David Pomerenke commited on
Commit
edcfb8f
·
1 Parent(s): 29c8ef6

Run on all languages

Browse files
Files changed (4) hide show
  1. app.py +1 -1
  2. evals.py +1 -2
  3. results.json +3489 -194
  4. uv.lock +1 -1
app.py CHANGED
@@ -414,7 +414,7 @@ def create_world_map(results):
414
  xanchor="center",
415
  thickness=20, # make it a bit thicker when horizontal
416
  ),
417
- zmin=0.2,
418
  zmax=0.5,
419
  )
420
  )
 
414
  xanchor="center",
415
  thickness=20, # make it a bit thicker when horizontal
416
  ),
417
+ zmin=0.1,
418
  zmax=0.5,
419
  )
420
  )
evals.py CHANGED
@@ -139,14 +139,13 @@ languages = pd.merge(
139
  languages["in_benchmark"] = languages["bcp_47"].isin(benchmark_languages["bcp_47"])
140
 
141
  languages = languages.sort_values(by="speakers", ascending=False)
142
- languages = languages.iloc[:30]
143
 
144
  # sample languages to translate to
145
  target_languages = languages[languages["in_benchmark"]].sample(
146
  n=n_sentences, weights="speakers", replace=True, random_state=42
147
  )
148
  # sample languages to analyze with all models
149
- detailed_languages = languages[languages["in_benchmark"]].sample(n=10, random_state=42)
150
 
151
 
152
  # utils
 
139
  languages["in_benchmark"] = languages["bcp_47"].isin(benchmark_languages["bcp_47"])
140
 
141
  languages = languages.sort_values(by="speakers", ascending=False)
 
142
 
143
  # sample languages to translate to
144
  target_languages = languages[languages["in_benchmark"]].sample(
145
  n=n_sentences, weights="speakers", replace=True, random_state=42
146
  )
147
  # sample languages to analyze with all models
148
+ detailed_languages = languages[languages["in_benchmark"]].sample(n=30, random_state=42)
149
 
150
 
151
  # utils
results.json CHANGED
@@ -4,32 +4,12 @@
4
  "bcp_47": "en",
5
  "speakers": 1636485840,
6
  "scores": [
7
- {
8
- "model": "openai/gpt-4o-mini",
9
- "bleu": 0.5292544231540742
10
- },
11
  {
12
  "model": "meta-llama/llama-3.3-70b-instruct",
13
  "bleu": 0.465648126623753
14
- },
15
- {
16
- "model": "mistralai/mistral-small-24b-instruct-2501",
17
- "bleu": 0.478174166015779
18
- },
19
- {
20
- "model": "google/gemini-2.0-flash-001",
21
- "bleu": 0.5266708610727185
22
- },
23
- {
24
- "model": "deepseek/deepseek-chat",
25
- "bleu": 0.5549134525314846
26
- },
27
- {
28
- "model": "microsoft/phi-4",
29
- "bleu": 0.4668163276973811
30
  }
31
  ],
32
- "bleu": 0.5035795595158651,
33
  "commonvoice_hours": 2649.0,
34
  "commonvoice_locale": "en",
35
  "population": {
@@ -482,32 +462,12 @@
482
  "bcp_47": "pt",
483
  "speakers": 237496885,
484
  "scores": [
485
- {
486
- "model": "openai/gpt-4o-mini",
487
- "bleu": 0.36418677020025814
488
- },
489
  {
490
  "model": "meta-llama/llama-3.3-70b-instruct",
491
  "bleu": 0.36847793827413045
492
- },
493
- {
494
- "model": "mistralai/mistral-small-24b-instruct-2501",
495
- "bleu": 0.33146858114564615
496
- },
497
- {
498
- "model": "google/gemini-2.0-flash-001",
499
- "bleu": 0.3685111782334586
500
- },
501
- {
502
- "model": "deepseek/deepseek-chat",
503
- "bleu": 0.41976380092637283
504
- },
505
- {
506
- "model": "microsoft/phi-4",
507
- "bleu": 0.35431476252948624
508
  }
509
  ],
510
- "bleu": 0.367787171884892,
511
  "commonvoice_hours": 176.0,
512
  "commonvoice_locale": "pt",
513
  "population": {
@@ -615,32 +575,12 @@
615
  "bcp_47": "sw",
616
  "speakers": 171610296,
617
  "scores": [
618
- {
619
- "model": "openai/gpt-4o-mini",
620
- "bleu": 0.3240516590412694
621
- },
622
  {
623
  "model": "meta-llama/llama-3.3-70b-instruct",
624
  "bleu": 0.3021494866906426
625
- },
626
- {
627
- "model": "mistralai/mistral-small-24b-instruct-2501",
628
- "bleu": 0.21392015063903014
629
- },
630
- {
631
- "model": "google/gemini-2.0-flash-001",
632
- "bleu": 0.39351510575974585
633
- },
634
- {
635
- "model": "deepseek/deepseek-chat",
636
- "bleu": 0.32036034973159405
637
- },
638
- {
639
- "model": "microsoft/phi-4",
640
- "bleu": 0.2572750657835761
641
  }
642
  ],
643
- "bleu": 0.3018786362743097,
644
  "commonvoice_hours": 411.0,
645
  "commonvoice_locale": "sw",
646
  "population": {
@@ -660,32 +600,12 @@
660
  "bcp_47": "id",
661
  "speakers": 171207687,
662
  "scores": [
663
- {
664
- "model": "openai/gpt-4o-mini",
665
- "bleu": 0.31923635687963403
666
- },
667
  {
668
  "model": "meta-llama/llama-3.3-70b-instruct",
669
  "bleu": 0.32764790212460226
670
- },
671
- {
672
- "model": "mistralai/mistral-small-24b-instruct-2501",
673
- "bleu": 0.2387340248344293
674
- },
675
- {
676
- "model": "google/gemini-2.0-flash-001",
677
- "bleu": 0.36831341439353155
678
- },
679
- {
680
- "model": "deepseek/deepseek-chat",
681
- "bleu": 0.3614031163582736
682
- },
683
- {
684
- "model": "microsoft/phi-4",
685
- "bleu": 0.2526105547535859
686
  }
687
  ],
688
- "bleu": 0.31132422822400946,
689
  "commonvoice_hours": 33.0,
690
  "commonvoice_locale": "id",
691
  "population": {
@@ -698,32 +618,12 @@
698
  "bcp_47": "de",
699
  "speakers": 136350226,
700
  "scores": [
701
- {
702
- "model": "openai/gpt-4o-mini",
703
- "bleu": 0.39299196408709347
704
- },
705
  {
706
  "model": "meta-llama/llama-3.3-70b-instruct",
707
  "bleu": 0.3886659265736507
708
- },
709
- {
710
- "model": "mistralai/mistral-small-24b-instruct-2501",
711
- "bleu": 0.35731041330816654
712
- },
713
- {
714
- "model": "google/gemini-2.0-flash-001",
715
- "bleu": 0.46630655663486287
716
- },
717
- {
718
- "model": "deepseek/deepseek-chat",
719
- "bleu": 0.4373279553229372
720
- },
721
- {
722
- "model": "microsoft/phi-4",
723
- "bleu": 0.353010712972096
724
  }
725
  ],
726
- "bleu": 0.3992689214831344,
727
  "commonvoice_hours": 1357.0,
728
  "commonvoice_locale": "de",
729
  "population": {
@@ -780,12 +680,32 @@
780
  "bcp_47": "te",
781
  "speakers": 95478480,
782
  "scores": [
 
 
 
 
783
  {
784
  "model": "meta-llama/llama-3.3-70b-instruct",
785
  "bleu": 0.37949545228579734
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
786
  }
787
  ],
788
- "bleu": 0.37949545228579734,
789
  "commonvoice_hours": 0.3,
790
  "commonvoice_locale": "te",
791
  "population": {
@@ -797,12 +717,32 @@
797
  "bcp_47": "mr",
798
  "speakers": 92826300,
799
  "scores": [
 
 
 
 
800
  {
801
  "model": "meta-llama/llama-3.3-70b-instruct",
802
  "bleu": 0.2852384896861461
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803
  }
804
  ],
805
- "bleu": 0.2852384896861461,
806
  "commonvoice_hours": 20.0,
807
  "commonvoice_locale": "mr",
808
  "population": {
@@ -814,32 +754,12 @@
814
  "bcp_47": "jv",
815
  "speakers": 91180665,
816
  "scores": [
817
- {
818
- "model": "openai/gpt-4o-mini",
819
- "bleu": 0.2755399920693052
820
- },
821
  {
822
  "model": "meta-llama/llama-3.3-70b-instruct",
823
  "bleu": 0.2494035065095152
824
- },
825
- {
826
- "model": "mistralai/mistral-small-24b-instruct-2501",
827
- "bleu": 0.1266725662438766
828
- },
829
- {
830
- "model": "google/gemini-2.0-flash-001",
831
- "bleu": 0.35614761567604236
832
- },
833
- {
834
- "model": "deepseek/deepseek-chat",
835
- "bleu": 0.29069945440951733
836
- },
837
- {
838
- "model": "microsoft/phi-4",
839
- "bleu": 0.20468330413608699
840
  }
841
  ],
842
- "bleu": 0.2505244065073906,
843
  "commonvoice_hours": 0.0,
844
  "commonvoice_locale": "jv",
845
  "population": {
@@ -852,12 +772,32 @@
852
  "bcp_47": "vi",
853
  "speakers": 86222962,
854
  "scores": [
 
 
 
 
855
  {
856
  "model": "meta-llama/llama-3.3-70b-instruct",
857
  "bleu": 0.2956750563565745
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
  }
859
  ],
860
- "bleu": 0.2956750563565745,
861
  "commonvoice_hours": 5.9,
862
  "commonvoice_locale": "vi",
863
  "population": {
@@ -872,12 +812,32 @@
872
  "bcp_47": "ta",
873
  "speakers": 85616159,
874
  "scores": [
 
 
 
 
875
  {
876
  "model": "meta-llama/llama-3.3-70b-instruct",
877
  "bleu": 0.27547489589987734
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
878
  }
879
  ],
880
- "bleu": 0.27547489589987734,
881
  "commonvoice_hours": 234.0,
882
  "commonvoice_locale": "ta",
883
  "population": {
@@ -921,32 +881,12 @@
921
  "bcp_47": "tr",
922
  "speakers": 80360704,
923
  "scores": [
924
- {
925
- "model": "openai/gpt-4o-mini",
926
- "bleu": 0.32005697883543305
927
- },
928
  {
929
  "model": "meta-llama/llama-3.3-70b-instruct",
930
  "bleu": 0.3128582218784996
931
- },
932
- {
933
- "model": "mistralai/mistral-small-24b-instruct-2501",
934
- "bleu": 0.26166377989267786
935
- },
936
- {
937
- "model": "google/gemini-2.0-flash-001",
938
- "bleu": 0.3488811534537982
939
- },
940
- {
941
- "model": "deepseek/deepseek-chat",
942
- "bleu": 0.352126761953689
943
- },
944
- {
945
- "model": "microsoft/phi-4",
946
- "bleu": 0.22855630110633351
947
  }
948
  ],
949
- "bleu": 0.30402386618673855,
950
  "commonvoice_hours": 127.0,
951
  "commonvoice_locale": "tr",
952
  "population": {
@@ -969,32 +909,12 @@
969
  "bcp_47": "yue",
970
  "speakers": 79654759,
971
  "scores": [
972
- {
973
- "model": "openai/gpt-4o-mini",
974
- "bleu": 0.25523473174207373
975
- },
976
  {
977
  "model": "meta-llama/llama-3.3-70b-instruct",
978
  "bleu": 0.2901127503841879
979
- },
980
- {
981
- "model": "mistralai/mistral-small-24b-instruct-2501",
982
- "bleu": 0.23880603698191288
983
- },
984
- {
985
- "model": "google/gemini-2.0-flash-001",
986
- "bleu": 0.33330775674699475
987
- },
988
- {
989
- "model": "deepseek/deepseek-chat",
990
- "bleu": 0.30942219437451896
991
- },
992
- {
993
- "model": "microsoft/phi-4",
994
- "bleu": 0.25167599008414604
995
  }
996
  ],
997
- "bleu": 0.27975991005230577,
998
  "commonvoice_hours": 203.0,
999
  "commonvoice_locale": "yue",
1000
  "population": {
@@ -1031,12 +951,32 @@
1031
  "bcp_47": "it",
1032
  "speakers": 70247060,
1033
  "scores": [
 
 
 
 
1034
  {
1035
  "model": "meta-llama/llama-3.3-70b-instruct",
1036
  "bleu": 0.3273249067267197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1037
  }
1038
  ],
1039
- "bleu": 0.3273249067267197,
1040
  "commonvoice_hours": 362.0,
1041
  "commonvoice_locale": "it",
1042
  "population": {
@@ -1062,32 +1002,12 @@
1062
  "bcp_47": "fil",
1063
  "speakers": 67471096,
1064
  "scores": [
1065
- {
1066
- "model": "openai/gpt-4o-mini",
1067
- "bleu": 0.35950288667055635
1068
- },
1069
  {
1070
  "model": "meta-llama/llama-3.3-70b-instruct",
1071
  "bleu": 0.3458571802193247
1072
- },
1073
- {
1074
- "model": "mistralai/mistral-small-24b-instruct-2501",
1075
- "bleu": 0.2769096553598123
1076
- },
1077
- {
1078
- "model": "google/gemini-2.0-flash-001",
1079
- "bleu": 0.4030081046637165
1080
- },
1081
- {
1082
- "model": "deepseek/deepseek-chat",
1083
- "bleu": 0.3712699611966998
1084
- },
1085
- {
1086
- "model": "microsoft/phi-4",
1087
- "bleu": 0.25550756070033753
1088
  }
1089
  ],
1090
- "bleu": 0.3353425581350746,
1091
  "commonvoice_hours": 0.0,
1092
  "commonvoice_locale": "tl",
1093
  "population": {
@@ -1132,5 +1052,3380 @@
1132
  "IN": 59674050,
1133
  "KE": 4978
1134
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1135
  }
1136
  ]
 
4
  "bcp_47": "en",
5
  "speakers": 1636485840,
6
  "scores": [
 
 
 
 
7
  {
8
  "model": "meta-llama/llama-3.3-70b-instruct",
9
  "bleu": 0.465648126623753
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  }
11
  ],
12
+ "bleu": 0.465648126623753,
13
  "commonvoice_hours": 2649.0,
14
  "commonvoice_locale": "en",
15
  "population": {
 
462
  "bcp_47": "pt",
463
  "speakers": 237496885,
464
  "scores": [
 
 
 
 
465
  {
466
  "model": "meta-llama/llama-3.3-70b-instruct",
467
  "bleu": 0.36847793827413045
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  }
469
  ],
470
+ "bleu": 0.36847793827413045,
471
  "commonvoice_hours": 176.0,
472
  "commonvoice_locale": "pt",
473
  "population": {
 
575
  "bcp_47": "sw",
576
  "speakers": 171610296,
577
  "scores": [
 
 
 
 
578
  {
579
  "model": "meta-llama/llama-3.3-70b-instruct",
580
  "bleu": 0.3021494866906426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
  }
582
  ],
583
+ "bleu": 0.3021494866906426,
584
  "commonvoice_hours": 411.0,
585
  "commonvoice_locale": "sw",
586
  "population": {
 
600
  "bcp_47": "id",
601
  "speakers": 171207687,
602
  "scores": [
 
 
 
 
603
  {
604
  "model": "meta-llama/llama-3.3-70b-instruct",
605
  "bleu": 0.32764790212460226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
606
  }
607
  ],
608
+ "bleu": 0.32764790212460226,
609
  "commonvoice_hours": 33.0,
610
  "commonvoice_locale": "id",
611
  "population": {
 
618
  "bcp_47": "de",
619
  "speakers": 136350226,
620
  "scores": [
 
 
 
 
621
  {
622
  "model": "meta-llama/llama-3.3-70b-instruct",
623
  "bleu": 0.3886659265736507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
624
  }
625
  ],
626
+ "bleu": 0.3886659265736507,
627
  "commonvoice_hours": 1357.0,
628
  "commonvoice_locale": "de",
629
  "population": {
 
680
  "bcp_47": "te",
681
  "speakers": 95478480,
682
  "scores": [
683
+ {
684
+ "model": "openai/gpt-4o-mini",
685
+ "bleu": 0.28964452051612244
686
+ },
687
  {
688
  "model": "meta-llama/llama-3.3-70b-instruct",
689
  "bleu": 0.37949545228579734
690
+ },
691
+ {
692
+ "model": "mistralai/mistral-small-24b-instruct-2501",
693
+ "bleu": 0.1314237858560668
694
+ },
695
+ {
696
+ "model": "google/gemini-2.0-flash-001",
697
+ "bleu": 0.35187073123584545
698
+ },
699
+ {
700
+ "model": "deepseek/deepseek-chat",
701
+ "bleu": 0.2808664068178743
702
+ },
703
+ {
704
+ "model": "microsoft/phi-4",
705
+ "bleu": 0.16468103557770178
706
  }
707
  ],
708
+ "bleu": 0.2663303220482347,
709
  "commonvoice_hours": 0.3,
710
  "commonvoice_locale": "te",
711
  "population": {
 
717
  "bcp_47": "mr",
718
  "speakers": 92826300,
719
  "scores": [
720
+ {
721
+ "model": "openai/gpt-4o-mini",
722
+ "bleu": 0.235200323237626
723
+ },
724
  {
725
  "model": "meta-llama/llama-3.3-70b-instruct",
726
  "bleu": 0.2852384896861461
727
+ },
728
+ {
729
+ "model": "mistralai/mistral-small-24b-instruct-2501",
730
+ "bleu": 0.1158656438579424
731
+ },
732
+ {
733
+ "model": "google/gemini-2.0-flash-001",
734
+ "bleu": 0.3039098126596327
735
+ },
736
+ {
737
+ "model": "deepseek/deepseek-chat",
738
+ "bleu": 0.23702154369195902
739
+ },
740
+ {
741
+ "model": "microsoft/phi-4",
742
+ "bleu": 0.14770612974379574
743
  }
744
  ],
745
+ "bleu": 0.22082365714618368,
746
  "commonvoice_hours": 20.0,
747
  "commonvoice_locale": "mr",
748
  "population": {
 
754
  "bcp_47": "jv",
755
  "speakers": 91180665,
756
  "scores": [
 
 
 
 
757
  {
758
  "model": "meta-llama/llama-3.3-70b-instruct",
759
  "bleu": 0.2494035065095152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
  }
761
  ],
762
+ "bleu": 0.2494035065095152,
763
  "commonvoice_hours": 0.0,
764
  "commonvoice_locale": "jv",
765
  "population": {
 
772
  "bcp_47": "vi",
773
  "speakers": 86222962,
774
  "scores": [
775
+ {
776
+ "model": "openai/gpt-4o-mini",
777
+ "bleu": 0.2790022403255029
778
+ },
779
  {
780
  "model": "meta-llama/llama-3.3-70b-instruct",
781
  "bleu": 0.2956750563565745
782
+ },
783
+ {
784
+ "model": "mistralai/mistral-small-24b-instruct-2501",
785
+ "bleu": 0.17142561681893811
786
+ },
787
+ {
788
+ "model": "google/gemini-2.0-flash-001",
789
+ "bleu": 0.32076560886563743
790
+ },
791
+ {
792
+ "model": "deepseek/deepseek-chat",
793
+ "bleu": 0.269842412561934
794
+ },
795
+ {
796
+ "model": "microsoft/phi-4",
797
+ "bleu": 0.19118351096728373
798
  }
799
  ],
800
+ "bleu": 0.25464907431597844,
801
  "commonvoice_hours": 5.9,
802
  "commonvoice_locale": "vi",
803
  "population": {
 
812
  "bcp_47": "ta",
813
  "speakers": 85616159,
814
  "scores": [
815
+ {
816
+ "model": "openai/gpt-4o-mini",
817
+ "bleu": 0.2159676106476219
818
+ },
819
  {
820
  "model": "meta-llama/llama-3.3-70b-instruct",
821
  "bleu": 0.27547489589987734
822
+ },
823
+ {
824
+ "model": "mistralai/mistral-small-24b-instruct-2501",
825
+ "bleu": 0.11055269618146167
826
+ },
827
+ {
828
+ "model": "google/gemini-2.0-flash-001",
829
+ "bleu": 0.2993653070835946
830
+ },
831
+ {
832
+ "model": "deepseek/deepseek-chat",
833
+ "bleu": 0.22772498517043588
834
+ },
835
+ {
836
+ "model": "microsoft/phi-4",
837
+ "bleu": 0.14949134449145374
838
  }
839
  ],
840
+ "bleu": 0.21309613991240753,
841
  "commonvoice_hours": 234.0,
842
  "commonvoice_locale": "ta",
843
  "population": {
 
881
  "bcp_47": "tr",
882
  "speakers": 80360704,
883
  "scores": [
 
 
 
 
884
  {
885
  "model": "meta-llama/llama-3.3-70b-instruct",
886
  "bleu": 0.3128582218784996
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
887
  }
888
  ],
889
+ "bleu": 0.3128582218784996,
890
  "commonvoice_hours": 127.0,
891
  "commonvoice_locale": "tr",
892
  "population": {
 
909
  "bcp_47": "yue",
910
  "speakers": 79654759,
911
  "scores": [
 
 
 
 
912
  {
913
  "model": "meta-llama/llama-3.3-70b-instruct",
914
  "bleu": 0.2901127503841879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
915
  }
916
  ],
917
+ "bleu": 0.2901127503841879,
918
  "commonvoice_hours": 203.0,
919
  "commonvoice_locale": "yue",
920
  "population": {
 
951
  "bcp_47": "it",
952
  "speakers": 70247060,
953
  "scores": [
954
+ {
955
+ "model": "openai/gpt-4o-mini",
956
+ "bleu": 0.29744196180619636
957
+ },
958
  {
959
  "model": "meta-llama/llama-3.3-70b-instruct",
960
  "bleu": 0.3273249067267197
961
+ },
962
+ {
963
+ "model": "mistralai/mistral-small-24b-instruct-2501",
964
+ "bleu": 0.26709213193768344
965
+ },
966
+ {
967
+ "model": "google/gemini-2.0-flash-001",
968
+ "bleu": 0.34518602347709243
969
+ },
970
+ {
971
+ "model": "deepseek/deepseek-chat",
972
+ "bleu": 0.3136120219290237
973
+ },
974
+ {
975
+ "model": "microsoft/phi-4",
976
+ "bleu": 0.26629405288011837
977
  }
978
  ],
979
+ "bleu": 0.302825183126139,
980
  "commonvoice_hours": 362.0,
981
  "commonvoice_locale": "it",
982
  "population": {
 
1002
  "bcp_47": "fil",
1003
  "speakers": 67471096,
1004
  "scores": [
 
 
 
 
1005
  {
1006
  "model": "meta-llama/llama-3.3-70b-instruct",
1007
  "bleu": 0.3458571802193247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1008
  }
1009
  ],
1010
+ "bleu": 0.3458571802193247,
1011
  "commonvoice_hours": 0.0,
1012
  "commonvoice_locale": "tl",
1013
  "population": {
 
1052
  "IN": 59674050,
1053
  "KE": 4978
1054
  }
1055
+ },
1056
+ {
1057
+ "language_name": "Thai",
1058
+ "bcp_47": "th",
1059
+ "speakers": 55181920,
1060
+ "scores": [
1061
+ {
1062
+ "model": "meta-llama/llama-3.3-70b-instruct",
1063
+ "bleu": 0.2461561900137243
1064
+ }
1065
+ ],
1066
+ "bleu": 0.2461561900137243,
1067
+ "commonvoice_hours": 172.0,
1068
+ "commonvoice_locale": "th",
1069
+ "population": {
1070
+ "TH": 55181920
1071
+ }
1072
+ },
1073
+ {
1074
+ "language_name": "Kannada",
1075
+ "bcp_47": "kn",
1076
+ "speakers": 49065330,
1077
+ "scores": [
1078
+ {
1079
+ "model": "meta-llama/llama-3.3-70b-instruct",
1080
+ "bleu": 0.25650866519447973
1081
+ }
1082
+ ],
1083
+ "bleu": 0.25650866519447973,
1084
+ "commonvoice_hours": 0.0,
1085
+ "commonvoice_locale": "kn",
1086
+ "population": {
1087
+ "IN": 49065330
1088
+ }
1089
+ },
1090
+ {
1091
+ "language_name": "Malayalam",
1092
+ "bcp_47": "ml",
1093
+ "speakers": 43257484,
1094
+ "scores": [
1095
+ {
1096
+ "model": "openai/gpt-4o-mini",
1097
+ "bleu": 0.23073727076678055
1098
+ },
1099
+ {
1100
+ "model": "meta-llama/llama-3.3-70b-instruct",
1101
+ "bleu": 0.21782657144614825
1102
+ },
1103
+ {
1104
+ "model": "mistralai/mistral-small-24b-instruct-2501",
1105
+ "bleu": 0.1695641998407403
1106
+ },
1107
+ {
1108
+ "model": "google/gemini-2.0-flash-001",
1109
+ "bleu": 0.3195014249623395
1110
+ },
1111
+ {
1112
+ "model": "deepseek/deepseek-chat",
1113
+ "bleu": 0.2633223158501049
1114
+ },
1115
+ {
1116
+ "model": "microsoft/phi-4",
1117
+ "bleu": 0.19162873119255258
1118
+ }
1119
+ ],
1120
+ "bleu": 0.23209675234311103,
1121
+ "commonvoice_hours": 2.8,
1122
+ "commonvoice_locale": "ml",
1123
+ "population": {
1124
+ "AE": 699446,
1125
+ "BH": 49665,
1126
+ "IL": 7981,
1127
+ "IN": 42434880,
1128
+ "MY": 48978,
1129
+ "QA": 6599,
1130
+ "SG": 9935
1131
+ }
1132
+ },
1133
+ {
1134
+ "language_name": "Odia",
1135
+ "bcp_47": "or",
1136
+ "speakers": 42434880,
1137
+ "scores": [
1138
+ {
1139
+ "model": "meta-llama/llama-3.3-70b-instruct",
1140
+ "bleu": 0.2616054244059909
1141
+ }
1142
+ ],
1143
+ "bleu": 0.2616054244059909,
1144
+ "commonvoice_hours": 2.8,
1145
+ "commonvoice_locale": "or",
1146
+ "population": {
1147
+ "IN": 42434880
1148
+ }
1149
+ },
1150
+ {
1151
+ "language_name": "Polish",
1152
+ "bcp_47": "pl",
1153
+ "speakers": 41077399,
1154
+ "scores": [
1155
+ {
1156
+ "model": "meta-llama/llama-3.3-70b-instruct",
1157
+ "bleu": 0.24382878885531348
1158
+ }
1159
+ ],
1160
+ "bleu": 0.24382878885531348,
1161
+ "commonvoice_hours": 174.0,
1162
+ "commonvoice_locale": "pl",
1163
+ "population": {
1164
+ "CA": 173393,
1165
+ "CZ": 52442,
1166
+ "DE": 232463,
1167
+ "GB": 2630444,
1168
+ "IL": 130132,
1169
+ "PL": 36751008,
1170
+ "RO": 2769,
1171
+ "SK": 50598,
1172
+ "UA": 1054150
1173
+ }
1174
+ },
1175
+ {
1176
+ "language_name": "Hausa",
1177
+ "bcp_47": "ha",
1178
+ "speakers": 40411882,
1179
+ "scores": [
1180
+ {
1181
+ "model": "meta-llama/llama-3.3-70b-instruct",
1182
+ "bleu": 0.1493358875548207
1183
+ }
1184
+ ],
1185
+ "bleu": 0.1493358875548207,
1186
+ "commonvoice_hours": 4.1,
1187
+ "commonvoice_locale": "ha",
1188
+ "population": {
1189
+ "CM": 38843,
1190
+ "GH": 252326,
1191
+ "NE": 9336684,
1192
+ "NG": 29963920,
1193
+ "SD": 820109
1194
+ }
1195
+ },
1196
+ {
1197
+ "language_name": "Sindhi",
1198
+ "bcp_47": "sd",
1199
+ "speakers": 40329510,
1200
+ "scores": [
1201
+ {
1202
+ "model": "meta-llama/llama-3.3-70b-instruct",
1203
+ "bleu": 0.22524971121549384
1204
+ }
1205
+ ],
1206
+ "bleu": 0.22524971121549384,
1207
+ "commonvoice_hours": 0.4,
1208
+ "commonvoice_locale": "sd",
1209
+ "population": {
1210
+ "IN": 5304360,
1211
+ "PK": 35025150
1212
+ }
1213
+ },
1214
+ {
1215
+ "language_name": "Malay",
1216
+ "bcp_47": "ms",
1217
+ "speakers": 38097307,
1218
+ "scores": [
1219
+ {
1220
+ "model": "openai/gpt-4o-mini",
1221
+ "bleu": 0.27545115634664297
1222
+ },
1223
+ {
1224
+ "model": "meta-llama/llama-3.3-70b-instruct",
1225
+ "bleu": 0.2445459295400275
1226
+ },
1227
+ {
1228
+ "model": "mistralai/mistral-small-24b-instruct-2501",
1229
+ "bleu": 0.23380594556884363
1230
+ },
1231
+ {
1232
+ "model": "google/gemini-2.0-flash-001",
1233
+ "bleu": 0.3521510571182875
1234
+ },
1235
+ {
1236
+ "model": "deepseek/deepseek-chat",
1237
+ "bleu": 0.2984537737499322
1238
+ },
1239
+ {
1240
+ "model": "microsoft/phi-4",
1241
+ "bleu": 0.20974647653543713
1242
+ }
1243
+ ],
1244
+ "bleu": 0.2690257231431951,
1245
+ "commonvoice_hours": 0.0,
1246
+ "commonvoice_locale": "ms",
1247
+ "population": {
1248
+ "BN": 455189,
1249
+ "CC": 495,
1250
+ "ID": 12283196,
1251
+ "MY": 24489075,
1252
+ "SG": 869352
1253
+ }
1254
+ },
1255
+ {
1256
+ "language_name": "Burmese",
1257
+ "bcp_47": "my",
1258
+ "speakers": 36559231,
1259
+ "scores": [
1260
+ {
1261
+ "model": "meta-llama/llama-3.3-70b-instruct",
1262
+ "bleu": 0.17529594258449108
1263
+ }
1264
+ ],
1265
+ "bleu": 0.17529594258449108,
1266
+ "commonvoice_hours": 0.0,
1267
+ "commonvoice_locale": "my",
1268
+ "population": {
1269
+ "BD": 341567,
1270
+ "MM": 36217664
1271
+ }
1272
+ },
1273
+ {
1274
+ "language_name": "Amharic",
1275
+ "bcp_47": "am",
1276
+ "speakers": 35728475,
1277
+ "scores": [
1278
+ {
1279
+ "model": "meta-llama/llama-3.3-70b-instruct",
1280
+ "bleu": 0.16453710162389373
1281
+ }
1282
+ ],
1283
+ "bleu": 0.16453710162389373,
1284
+ "commonvoice_hours": 1.8,
1285
+ "commonvoice_locale": "am",
1286
+ "population": {
1287
+ "ET": 35677290,
1288
+ "IL": 51185
1289
+ }
1290
+ },
1291
+ {
1292
+ "language_name": "Oromo",
1293
+ "bcp_47": "om",
1294
+ "speakers": 34897121,
1295
+ "scores": [
1296
+ {
1297
+ "model": "meta-llama/llama-3.3-70b-instruct",
1298
+ "bleu": 0.0691897827221633
1299
+ }
1300
+ ],
1301
+ "bleu": 0.0691897827221633,
1302
+ "commonvoice_hours": 0.0,
1303
+ "commonvoice_locale": "om",
1304
+ "population": {
1305
+ "ET": 34596160,
1306
+ "KE": 251581,
1307
+ "SO": 49380
1308
+ }
1309
+ },
1310
+ {
1311
+ "language_name": "Bhojpuri",
1312
+ "bcp_47": "bho",
1313
+ "speakers": 32934797,
1314
+ "scores": [
1315
+ {
1316
+ "model": "meta-llama/llama-3.3-70b-instruct",
1317
+ "bleu": 0.23176648838308359
1318
+ }
1319
+ ],
1320
+ "bleu": 0.23176648838308359,
1321
+ "commonvoice_hours": null,
1322
+ "commonvoice_locale": null,
1323
+ "population": {
1324
+ "IN": 30500070,
1325
+ "MU": 372430,
1326
+ "NP": 2062297
1327
+ }
1328
+ },
1329
+ {
1330
+ "language_name": "Uzbek",
1331
+ "bcp_47": "uz",
1332
+ "speakers": 32792780,
1333
+ "scores": [
1334
+ {
1335
+ "model": "meta-llama/llama-3.3-70b-instruct",
1336
+ "bleu": 0.2038544554531401
1337
+ }
1338
+ ],
1339
+ "bleu": 0.2038544554531401,
1340
+ "commonvoice_hours": 100.0,
1341
+ "commonvoice_locale": "uz",
1342
+ "population": {
1343
+ "AF": 1722259,
1344
+ "CN": 5576,
1345
+ "TM": 497577,
1346
+ "TR": 1968,
1347
+ "UZ": 30565400
1348
+ }
1349
+ },
1350
+ {
1351
+ "language_name": "Azerbaijani",
1352
+ "bcp_47": "az",
1353
+ "speakers": 32446682,
1354
+ "scores": [
1355
+ {
1356
+ "model": "meta-llama/llama-3.3-70b-instruct",
1357
+ "bleu": 0.1911853993562902
1358
+ }
1359
+ ],
1360
+ "bleu": 0.1911853993562902,
1361
+ "commonvoice_hours": 0.5,
1362
+ "commonvoice_locale": "az",
1363
+ "population": {
1364
+ "AM": 0,
1365
+ "AZ": 10093536,
1366
+ "IQ": 699709,
1367
+ "IR": 20381592,
1368
+ "RU": 131801,
1369
+ "TR": 1140044
1370
+ }
1371
+ },
1372
+ {
1373
+ "language_name": "Sundanese",
1374
+ "bcp_47": "su",
1375
+ "speakers": 32043120,
1376
+ "scores": [
1377
+ {
1378
+ "model": "openai/gpt-4o-mini",
1379
+ "bleu": 0.18638464691782505
1380
+ },
1381
+ {
1382
+ "model": "meta-llama/llama-3.3-70b-instruct",
1383
+ "bleu": 0.22413489641063433
1384
+ },
1385
+ {
1386
+ "model": "mistralai/mistral-small-24b-instruct-2501",
1387
+ "bleu": 0.12396443500389862
1388
+ },
1389
+ {
1390
+ "model": "google/gemini-2.0-flash-001",
1391
+ "bleu": 0.31100961611618916
1392
+ },
1393
+ {
1394
+ "model": "deepseek/deepseek-chat",
1395
+ "bleu": 0.23467068441612768
1396
+ },
1397
+ {
1398
+ "model": "microsoft/phi-4",
1399
+ "bleu": 0.1315217916431994
1400
+ }
1401
+ ],
1402
+ "bleu": 0.20194767841797903,
1403
+ "commonvoice_hours": null,
1404
+ "commonvoice_locale": null,
1405
+ "population": {
1406
+ "ID": 32043120
1407
+ }
1408
+ },
1409
+ {
1410
+ "language_name": "Dutch",
1411
+ "bcp_47": "nl",
1412
+ "speakers": 31765645,
1413
+ "scores": [
1414
+ {
1415
+ "model": "meta-llama/llama-3.3-70b-instruct",
1416
+ "bleu": 0.2592795402705898
1417
+ }
1418
+ ],
1419
+ "bleu": 0.2592795402705898,
1420
+ "commonvoice_hours": 114.0,
1421
+ "commonvoice_locale": "nl",
1422
+ "population": {
1423
+ "AW": 115845,
1424
+ "BE": 6446385,
1425
+ "BQ": 1600,
1426
+ "CA": 56541,
1427
+ "CW": 12108,
1428
+ "DE": 7214373,
1429
+ "FR": 88203,
1430
+ "NL": 17280400,
1431
+ "SR": 548612,
1432
+ "SX": 1578
1433
+ }
1434
+ },
1435
+ {
1436
+ "language_name": "Moroccan Arabic",
1437
+ "bcp_47": "ary",
1438
+ "speakers": 30938679,
1439
+ "scores": [
1440
+ {
1441
+ "model": "meta-llama/llama-3.3-70b-instruct",
1442
+ "bleu": 0.1795140543637709
1443
+ }
1444
+ ],
1445
+ "bleu": 0.1795140543637709,
1446
+ "commonvoice_hours": null,
1447
+ "commonvoice_locale": null,
1448
+ "population": {
1449
+ "MA": 30938679
1450
+ }
1451
+ },
1452
+ {
1453
+ "language_name": "Ukrainian",
1454
+ "bcp_47": "uk",
1455
+ "speakers": 29348975,
1456
+ "scores": [
1457
+ {
1458
+ "model": "openai/gpt-4o-mini",
1459
+ "bleu": 0.2564463888571809
1460
+ },
1461
+ {
1462
+ "model": "meta-llama/llama-3.3-70b-instruct",
1463
+ "bleu": 0.2922812040972885
1464
+ },
1465
+ {
1466
+ "model": "mistralai/mistral-small-24b-instruct-2501",
1467
+ "bleu": 0.24225796102393954
1468
+ },
1469
+ {
1470
+ "model": "google/gemini-2.0-flash-001",
1471
+ "bleu": 0.3452563778145038
1472
+ },
1473
+ {
1474
+ "model": "deepseek/deepseek-chat",
1475
+ "bleu": 0.3292127494173498
1476
+ },
1477
+ {
1478
+ "model": "microsoft/phi-4",
1479
+ "bleu": 0.2452825737163755
1480
+ }
1481
+ ],
1482
+ "bleu": 0.28512287582110635,
1483
+ "commonvoice_hours": 98.0,
1484
+ "commonvoice_locale": "uk",
1485
+ "population": {
1486
+ "CA": 75388,
1487
+ "MD": 471030,
1488
+ "PL": 149301,
1489
+ "RS": 0,
1490
+ "SK": 103371,
1491
+ "UA": 28549885
1492
+ }
1493
+ },
1494
+ {
1495
+ "language_name": "Yoruba",
1496
+ "bcp_47": "yo",
1497
+ "speakers": 28685568,
1498
+ "scores": [
1499
+ {
1500
+ "model": "meta-llama/llama-3.3-70b-instruct",
1501
+ "bleu": 0.09852676389614487
1502
+ }
1503
+ ],
1504
+ "bleu": 0.09852676389614487,
1505
+ "commonvoice_hours": 5.9,
1506
+ "commonvoice_locale": "yo",
1507
+ "population": {
1508
+ "BJ": 861928,
1509
+ "NG": 27823640
1510
+ }
1511
+ },
1512
+ {
1513
+ "language_name": "Igbo",
1514
+ "bcp_47": "ig",
1515
+ "speakers": 27823640,
1516
+ "scores": [
1517
+ {
1518
+ "model": "meta-llama/llama-3.3-70b-instruct",
1519
+ "bleu": 0.1549827013842116
1520
+ }
1521
+ ],
1522
+ "bleu": 0.1549827013842116,
1523
+ "commonvoice_hours": 0.0,
1524
+ "commonvoice_locale": "ig",
1525
+ "population": {
1526
+ "NG": 27823640
1527
+ }
1528
+ },
1529
+ {
1530
+ "language_name": "Cebuano",
1531
+ "bcp_47": "ceb",
1532
+ "speakers": 26203440,
1533
+ "scores": [
1534
+ {
1535
+ "model": "meta-llama/llama-3.3-70b-instruct",
1536
+ "bleu": 0.27783470672988303
1537
+ }
1538
+ ],
1539
+ "bleu": 0.27783470672988303,
1540
+ "commonvoice_hours": null,
1541
+ "commonvoice_locale": null,
1542
+ "population": {
1543
+ "PH": 26203440
1544
+ }
1545
+ },
1546
+ {
1547
+ "language_name": "Awadhi",
1548
+ "bcp_47": "awa",
1549
+ "speakers": 25862924,
1550
+ "scores": [
1551
+ {
1552
+ "model": "meta-llama/llama-3.3-70b-instruct",
1553
+ "bleu": 0.2554810263222905
1554
+ }
1555
+ ],
1556
+ "bleu": 0.2554810263222905,
1557
+ "commonvoice_hours": null,
1558
+ "commonvoice_locale": null,
1559
+ "population": {
1560
+ "IN": 25195710,
1561
+ "NP": 667214
1562
+ }
1563
+ },
1564
+ {
1565
+ "language_name": "Malagasy",
1566
+ "bcp_47": "mg",
1567
+ "speakers": 24260130,
1568
+ "scores": [
1569
+ {
1570
+ "model": "meta-llama/llama-3.3-70b-instruct",
1571
+ "bleu": 0.15163299980391426
1572
+ }
1573
+ ],
1574
+ "bleu": 0.15163299980391426,
1575
+ "commonvoice_hours": 0.0,
1576
+ "commonvoice_locale": "mg",
1577
+ "population": {
1578
+ "MG": 24260130
1579
+ }
1580
+ },
1581
+ {
1582
+ "language_name": "Romanian",
1583
+ "bcp_47": "ro",
1584
+ "speakers": 22187408,
1585
+ "scores": [
1586
+ {
1587
+ "model": "openai/gpt-4o-mini",
1588
+ "bleu": 0.33899025568959984
1589
+ },
1590
+ {
1591
+ "model": "meta-llama/llama-3.3-70b-instruct",
1592
+ "bleu": 0.26666997541189236
1593
+ },
1594
+ {
1595
+ "model": "mistralai/mistral-small-24b-instruct-2501",
1596
+ "bleu": 0.24172488724041316
1597
+ },
1598
+ {
1599
+ "model": "google/gemini-2.0-flash-001",
1600
+ "bleu": 0.37365302832845815
1601
+ },
1602
+ {
1603
+ "model": "deepseek/deepseek-chat",
1604
+ "bleu": 0.332600965807992
1605
+ },
1606
+ {
1607
+ "model": "microsoft/phi-4",
1608
+ "bleu": 0.2510789925018768
1609
+ }
1610
+ ],
1611
+ "bleu": 0.30078635083003874,
1612
+ "commonvoice_hours": 21.0,
1613
+ "commonvoice_locale": "ro",
1614
+ "population": {
1615
+ "CA": 101774,
1616
+ "HU": 96741,
1617
+ "IL": 320993,
1618
+ "MD": 2119635,
1619
+ "RO": 19172610,
1620
+ "RS": 147256,
1621
+ "UA": 228399
1622
+ }
1623
+ },
1624
+ {
1625
+ "language_name": "Nepali",
1626
+ "bcp_47": "ne",
1627
+ "speakers": 20903374,
1628
+ "scores": [
1629
+ {
1630
+ "model": "meta-llama/llama-3.3-70b-instruct",
1631
+ "bleu": 0.26199282928489126
1632
+ }
1633
+ ],
1634
+ "bleu": 0.26199282928489126,
1635
+ "commonvoice_hours": 1.3,
1636
+ "commonvoice_locale": "ne-NP",
1637
+ "population": {
1638
+ "BT": 132994,
1639
+ "IN": 7426104,
1640
+ "NP": 13344276
1641
+ }
1642
+ },
1643
+ {
1644
+ "language_name": "Maithili",
1645
+ "bcp_47": "mai",
1646
+ "speakers": 19249149,
1647
+ "scores": [
1648
+ {
1649
+ "model": "meta-llama/llama-3.3-70b-instruct",
1650
+ "bleu": 0.23975507119180453
1651
+ }
1652
+ ],
1653
+ "bleu": 0.23975507119180453,
1654
+ "commonvoice_hours": 0.0,
1655
+ "commonvoice_locale": "mai",
1656
+ "population": {
1657
+ "IN": 15913080,
1658
+ "NP": 3336069
1659
+ }
1660
+ },
1661
+ {
1662
+ "language_name": "Assamese",
1663
+ "bcp_47": "as",
1664
+ "speakers": 17239170,
1665
+ "scores": [
1666
+ {
1667
+ "model": "meta-llama/llama-3.3-70b-instruct",
1668
+ "bleu": 0.19363225565136952
1669
+ }
1670
+ ],
1671
+ "bleu": 0.19363225565136952,
1672
+ "commonvoice_hours": 2.8,
1673
+ "commonvoice_locale": "as",
1674
+ "population": {
1675
+ "IN": 17239170
1676
+ }
1677
+ },
1678
+ {
1679
+ "language_name": "Nyanja",
1680
+ "bcp_47": "ny",
1681
+ "speakers": 17026781,
1682
+ "scores": [
1683
+ {
1684
+ "model": "meta-llama/llama-3.3-70b-instruct",
1685
+ "bleu": 0.09504458945778768
1686
+ }
1687
+ ],
1688
+ "bleu": 0.09504458945778768,
1689
+ "commonvoice_hours": 0.0,
1690
+ "commonvoice_locale": "ny",
1691
+ "population": {
1692
+ "MW": 13353858,
1693
+ "MZ": 782553,
1694
+ "ZM": 2613990,
1695
+ "ZW": 276380
1696
+ }
1697
+ },
1698
+ {
1699
+ "language_name": "Somali",
1700
+ "bcp_47": "so",
1701
+ "speakers": 16911645,
1702
+ "scores": [
1703
+ {
1704
+ "model": "openai/gpt-4o-mini",
1705
+ "bleu": 0.2024994684991584
1706
+ },
1707
+ {
1708
+ "model": "meta-llama/llama-3.3-70b-instruct",
1709
+ "bleu": 0.1532133716194419
1710
+ },
1711
+ {
1712
+ "model": "mistralai/mistral-small-24b-instruct-2501",
1713
+ "bleu": 0.043408496427191995
1714
+ },
1715
+ {
1716
+ "model": "google/gemini-2.0-flash-001",
1717
+ "bleu": 0.3048371831537258
1718
+ },
1719
+ {
1720
+ "model": "deepseek/deepseek-chat",
1721
+ "bleu": 0.21360545410615966
1722
+ },
1723
+ {
1724
+ "model": "microsoft/phi-4",
1725
+ "bleu": 0.06484340154849859
1726
+ }
1727
+ ],
1728
+ "bleu": 0.16373456255902938,
1729
+ "commonvoice_hours": 0.0,
1730
+ "commonvoice_locale": "so",
1731
+ "population": {
1732
+ "CA": 49002,
1733
+ "DJ": 377940,
1734
+ "ET": 6486780,
1735
+ "GB": 131522,
1736
+ "KE": 695863,
1737
+ "SO": 9170538
1738
+ }
1739
+ },
1740
+ {
1741
+ "language_name": "Magahi",
1742
+ "bcp_47": "mag",
1743
+ "speakers": 15913080,
1744
+ "scores": [
1745
+ {
1746
+ "model": "meta-llama/llama-3.3-70b-instruct",
1747
+ "bleu": 0.29925558767802407
1748
+ }
1749
+ ],
1750
+ "bleu": 0.29925558767802407,
1751
+ "commonvoice_hours": null,
1752
+ "commonvoice_locale": null,
1753
+ "population": {
1754
+ "IN": 15913080
1755
+ }
1756
+ },
1757
+ {
1758
+ "language_name": "Serbian",
1759
+ "bcp_47": "sr",
1760
+ "speakers": 15602410,
1761
+ "scores": [
1762
+ {
1763
+ "model": "meta-llama/llama-3.3-70b-instruct",
1764
+ "bleu": 0.26029402164210574
1765
+ }
1766
+ ],
1767
+ "bleu": 0.26029402164210574,
1768
+ "commonvoice_hours": 7.4,
1769
+ "commonvoice_locale": "sr",
1770
+ "population": {
1771
+ "BA": 767118,
1772
+ "CA": 64080,
1773
+ "ME": 640352,
1774
+ "RO": 25563,
1775
+ "RS": 13884096,
1776
+ "RU": 4960,
1777
+ "TR": 22965,
1778
+ "XK": 193276
1779
+ }
1780
+ },
1781
+ {
1782
+ "language_name": "Sinhala",
1783
+ "bcp_47": "si",
1784
+ "speakers": 15564656,
1785
+ "scores": [
1786
+ {
1787
+ "model": "meta-llama/llama-3.3-70b-instruct",
1788
+ "bleu": 0.20259734060180434
1789
+ }
1790
+ ],
1791
+ "bleu": 0.20259734060180434,
1792
+ "commonvoice_hours": 0.0,
1793
+ "commonvoice_locale": "si",
1794
+ "population": {
1795
+ "LK": 15564656
1796
+ }
1797
+ },
1798
+ {
1799
+ "language_name": "Khmer",
1800
+ "bcp_47": "km",
1801
+ "speakers": 15065030,
1802
+ "scores": [
1803
+ {
1804
+ "model": "openai/gpt-4o-mini",
1805
+ "bleu": 0.21699232146684352
1806
+ },
1807
+ {
1808
+ "model": "meta-llama/llama-3.3-70b-instruct",
1809
+ "bleu": 0.21417349432612984
1810
+ },
1811
+ {
1812
+ "model": "mistralai/mistral-small-24b-instruct-2501",
1813
+ "bleu": 0.03287369352293625
1814
+ },
1815
+ {
1816
+ "model": "google/gemini-2.0-flash-001",
1817
+ "bleu": 0.34264229339556035
1818
+ },
1819
+ {
1820
+ "model": "deepseek/deepseek-chat",
1821
+ "bleu": 0.24630515818736093
1822
+ },
1823
+ {
1824
+ "model": "microsoft/phi-4",
1825
+ "bleu": 0.11830648687368288
1826
+ }
1827
+ ],
1828
+ "bleu": 0.1952155746287523,
1829
+ "commonvoice_hours": 0.0,
1830
+ "commonvoice_locale": "km",
1831
+ "population": {
1832
+ "KH": 15065030
1833
+ }
1834
+ },
1835
+ {
1836
+ "language_name": "Chhattisgarhi",
1837
+ "bcp_47": "hne",
1838
+ "speakers": 14586990,
1839
+ "scores": [
1840
+ {
1841
+ "model": "meta-llama/llama-3.3-70b-instruct",
1842
+ "bleu": 0.2212498883000727
1843
+ }
1844
+ ],
1845
+ "bleu": 0.2212498883000727,
1846
+ "commonvoice_hours": null,
1847
+ "commonvoice_locale": null,
1848
+ "population": {
1849
+ "IN": 14586990
1850
+ }
1851
+ },
1852
+ {
1853
+ "language_name": "Nigerian Fulfulde",
1854
+ "bcp_47": "fuv",
1855
+ "speakers": 14339876,
1856
+ "scores": [
1857
+ {
1858
+ "model": "meta-llama/llama-3.3-70b-instruct",
1859
+ "bleu": 0.08190470208193343
1860
+ }
1861
+ ],
1862
+ "bleu": 0.08190470208193343,
1863
+ "commonvoice_hours": null,
1864
+ "commonvoice_locale": null,
1865
+ "population": {
1866
+ "NG": 14339876
1867
+ }
1868
+ },
1869
+ {
1870
+ "language_name": "Zulu",
1871
+ "bcp_47": "zu",
1872
+ "speakers": 13973830,
1873
+ "scores": [
1874
+ {
1875
+ "model": "meta-llama/llama-3.3-70b-instruct",
1876
+ "bleu": 0.15449146502209737
1877
+ }
1878
+ ],
1879
+ "bleu": 0.15449146502209737,
1880
+ "commonvoice_hours": 0.0,
1881
+ "commonvoice_locale": "zu",
1882
+ "population": {
1883
+ "LS": 275706,
1884
+ "MW": 69949,
1885
+ "MZ": 1806,
1886
+ "SZ": 75105,
1887
+ "ZA": 13551264
1888
+ }
1889
+ },
1890
+ {
1891
+ "language_name": "Kazakh",
1892
+ "bcp_47": "kk",
1893
+ "speakers": 13637392,
1894
+ "scores": [
1895
+ {
1896
+ "model": "meta-llama/llama-3.3-70b-instruct",
1897
+ "bleu": 0.2392246097188628
1898
+ }
1899
+ ],
1900
+ "bleu": 0.2392246097188628,
1901
+ "commonvoice_hours": 2.1,
1902
+ "commonvoice_locale": "kk",
1903
+ "population": {
1904
+ "AF": 2015,
1905
+ "CN": 1184917,
1906
+ "IR": 2972,
1907
+ "KZ": 12218816,
1908
+ "MN": 228098,
1909
+ "TR": 574
1910
+ }
1911
+ },
1912
+ {
1913
+ "language_name": "Czech",
1914
+ "bcp_47": "cs",
1915
+ "speakers": 13045532,
1916
+ "scores": [
1917
+ {
1918
+ "model": "meta-llama/llama-3.3-70b-instruct",
1919
+ "bleu": 0.2844520855192069
1920
+ }
1921
+ ],
1922
+ "bleu": 0.2844520855192069,
1923
+ "commonvoice_hours": 74.0,
1924
+ "commonvoice_locale": "cs",
1925
+ "population": {
1926
+ "CZ": 10488450,
1927
+ "SK": 2557082
1928
+ }
1929
+ },
1930
+ {
1931
+ "language_name": "Swedish",
1932
+ "bcp_47": "sv",
1933
+ "speakers": 12932871,
1934
+ "scores": [
1935
+ {
1936
+ "model": "meta-llama/llama-3.3-70b-instruct",
1937
+ "bleu": 0.31838456223051165
1938
+ }
1939
+ ],
1940
+ "bleu": 0.31838456223051165,
1941
+ "commonvoice_hours": 47.0,
1942
+ "commonvoice_locale": "sv-SE",
1943
+ "population": {
1944
+ "AX": 25938,
1945
+ "DK": 763023,
1946
+ "FI": 2451535,
1947
+ "SE": 9692375
1948
+ }
1949
+ },
1950
+ {
1951
+ "language_name": "Hungarian",
1952
+ "bcp_47": "hu",
1953
+ "speakers": 12443430,
1954
+ "scores": [
1955
+ {
1956
+ "model": "meta-llama/llama-3.3-70b-instruct",
1957
+ "bleu": 0.2517614908428288
1958
+ }
1959
+ ],
1960
+ "bleu": 0.2517614908428288,
1961
+ "commonvoice_hours": 92.0,
1962
+ "commonvoice_locale": "hu",
1963
+ "population": {
1964
+ "AT": 23035,
1965
+ "CA": 49002,
1966
+ "HU": 9771830,
1967
+ "IL": 86755,
1968
+ "RO": 1405991,
1969
+ "RS": 336584,
1970
+ "SI": 9252,
1971
+ "SK": 598466,
1972
+ "UA": 162515
1973
+ }
1974
+ },
1975
+ {
1976
+ "language_name": "Greek",
1977
+ "bcp_47": "el",
1978
+ "speakers": 12292242,
1979
+ "scores": [
1980
+ {
1981
+ "model": "openai/gpt-4o-mini",
1982
+ "bleu": 0.24888370153898132
1983
+ },
1984
+ {
1985
+ "model": "meta-llama/llama-3.3-70b-instruct",
1986
+ "bleu": 0.27269196827838943
1987
+ },
1988
+ {
1989
+ "model": "mistralai/mistral-small-24b-instruct-2501",
1990
+ "bleu": 0.21351544070708506
1991
+ },
1992
+ {
1993
+ "model": "google/gemini-2.0-flash-001",
1994
+ "bleu": 0.3500489758234636
1995
+ },
1996
+ {
1997
+ "model": "deepseek/deepseek-chat",
1998
+ "bleu": 0.32858632704534785
1999
+ },
2000
+ {
2001
+ "model": "microsoft/phi-4",
2002
+ "bleu": 0.1903000734693107
2003
+ }
2004
+ ],
2005
+ "bleu": 0.2673377478104297,
2006
+ "commonvoice_hours": 20.0,
2007
+ "commonvoice_locale": "el",
2008
+ "population": {
2009
+ "AL": 58417,
2010
+ "CA": 124391,
2011
+ "CY": 1203346,
2012
+ "DE": 304607,
2013
+ "EG": 63516,
2014
+ "GR": 10501029,
2015
+ "IT": 21841,
2016
+ "RO": 4048,
2017
+ "TR": 4019,
2018
+ "UA": 7028
2019
+ }
2020
+ },
2021
+ {
2022
+ "language_name": "Shona",
2023
+ "bcp_47": "sn",
2024
+ "speakers": 11782503,
2025
+ "scores": [
2026
+ {
2027
+ "model": "openai/gpt-4o-mini",
2028
+ "bleu": 0.10438047654339373
2029
+ },
2030
+ {
2031
+ "model": "meta-llama/llama-3.3-70b-instruct",
2032
+ "bleu": 0.1054140213254438
2033
+ },
2034
+ {
2035
+ "model": "mistralai/mistral-small-24b-instruct-2501",
2036
+ "bleu": 0.049580893458705456
2037
+ },
2038
+ {
2039
+ "model": "google/gemini-2.0-flash-001",
2040
+ "bleu": 0.20979752102494492
2041
+ },
2042
+ {
2043
+ "model": "deepseek/deepseek-chat",
2044
+ "bleu": 0.14066476436038525
2045
+ },
2046
+ {
2047
+ "model": "microsoft/phi-4",
2048
+ "bleu": 0.060530921002659346
2049
+ }
2050
+ ],
2051
+ "bleu": 0.11172809961925541,
2052
+ "commonvoice_hours": null,
2053
+ "commonvoice_locale": null,
2054
+ "population": {
2055
+ "ZW": 11782503
2056
+ }
2057
+ },
2058
+ {
2059
+ "language_name": "Central Kurdish",
2060
+ "bcp_47": "ckb",
2061
+ "speakers": 11086549,
2062
+ "scores": [
2063
+ {
2064
+ "model": "meta-llama/llama-3.3-70b-instruct",
2065
+ "bleu": 0.20654412682006296
2066
+ }
2067
+ ],
2068
+ "bleu": 0.20654412682006296,
2069
+ "commonvoice_hours": 135.0,
2070
+ "commonvoice_locale": "ckb",
2071
+ "population": {
2072
+ "IQ": 7774540,
2073
+ "IR": 3312009
2074
+ }
2075
+ },
2076
+ {
2077
+ "language_name": "Kinyarwanda",
2078
+ "bcp_47": "rw",
2079
+ "speakers": 11083625,
2080
+ "scores": [
2081
+ {
2082
+ "model": "meta-llama/llama-3.3-70b-instruct",
2083
+ "bleu": 0.14006909985587948
2084
+ }
2085
+ ],
2086
+ "bleu": 0.14006909985587948,
2087
+ "commonvoice_hours": 2002.0,
2088
+ "commonvoice_locale": "rw",
2089
+ "population": {
2090
+ "CD": 386764,
2091
+ "RW": 9788548,
2092
+ "UG": 908313
2093
+ }
2094
+ },
2095
+ {
2096
+ "language_name": "Wolof",
2097
+ "bcp_47": "wo",
2098
+ "speakers": 11025494,
2099
+ "scores": [
2100
+ {
2101
+ "model": "meta-llama/llama-3.3-70b-instruct",
2102
+ "bleu": 0.08408628490585719
2103
+ }
2104
+ ],
2105
+ "bleu": 0.08408628490585719,
2106
+ "commonvoice_hours": 0.0,
2107
+ "commonvoice_locale": "wo",
2108
+ "population": {
2109
+ "MR": 10014,
2110
+ "SN": 11015480
2111
+ }
2112
+ },
2113
+ {
2114
+ "language_name": "Tunisian Arabic",
2115
+ "bcp_47": "aeb",
2116
+ "speakers": 10549080,
2117
+ "scores": [
2118
+ {
2119
+ "model": "meta-llama/llama-3.3-70b-instruct",
2120
+ "bleu": 0.23738824104522893
2121
+ }
2122
+ ],
2123
+ "bleu": 0.23738824104522893,
2124
+ "commonvoice_hours": null,
2125
+ "commonvoice_locale": null,
2126
+ "population": {
2127
+ "TN": 10549080
2128
+ }
2129
+ },
2130
+ {
2131
+ "language_name": "Iloko",
2132
+ "bcp_47": "ilo",
2133
+ "speakers": 10481376,
2134
+ "scores": [
2135
+ {
2136
+ "model": "meta-llama/llama-3.3-70b-instruct",
2137
+ "bleu": 0.20685666710862224
2138
+ }
2139
+ ],
2140
+ "bleu": 0.20685666710862224,
2141
+ "commonvoice_hours": null,
2142
+ "commonvoice_locale": null,
2143
+ "population": {
2144
+ "PH": 10481376
2145
+ }
2146
+ },
2147
+ {
2148
+ "language_name": "Xhosa",
2149
+ "bcp_47": "xh",
2150
+ "speakers": 10182944,
2151
+ "scores": [
2152
+ {
2153
+ "model": "meta-llama/llama-3.3-70b-instruct",
2154
+ "bleu": 0.1143143326923908
2155
+ }
2156
+ ],
2157
+ "bleu": 0.1143143326923908,
2158
+ "commonvoice_hours": 0.0,
2159
+ "commonvoice_locale": "xh",
2160
+ "population": {
2161
+ "LS": 19496,
2162
+ "ZA": 10163448
2163
+ }
2164
+ },
2165
+ {
2166
+ "language_name": "Tigrinya",
2167
+ "bcp_47": "ti",
2168
+ "speakers": 10145911,
2169
+ "scores": [
2170
+ {
2171
+ "model": "meta-llama/llama-3.3-70b-instruct",
2172
+ "bleu": 0.08532345270447181
2173
+ }
2174
+ ],
2175
+ "bleu": 0.08532345270447181,
2176
+ "commonvoice_hours": 0.0,
2177
+ "commonvoice_locale": "ti",
2178
+ "population": {
2179
+ "ER": 3648720,
2180
+ "ET": 6486780,
2181
+ "IL": 10411
2182
+ }
2183
+ },
2184
+ {
2185
+ "language_name": "Belarusian",
2186
+ "bcp_47": "be",
2187
+ "speakers": 10064517,
2188
+ "scores": [
2189
+ {
2190
+ "model": "meta-llama/llama-3.3-70b-instruct",
2191
+ "bleu": 0.18341973561481445
2192
+ }
2193
+ ],
2194
+ "bleu": 0.18341973561481445,
2195
+ "commonvoice_hours": 1804.0,
2196
+ "commonvoice_locale": "be",
2197
+ "population": {
2198
+ "BY": 9477920,
2199
+ "PL": 222037,
2200
+ "UA": 364560
2201
+ }
2202
+ },
2203
+ {
2204
+ "language_name": "Luba-Lulua",
2205
+ "bcp_47": "lua",
2206
+ "speakers": 9770880,
2207
+ "scores": [
2208
+ {
2209
+ "model": "meta-llama/llama-3.3-70b-instruct",
2210
+ "bleu": 0.11581604983636683
2211
+ }
2212
+ ],
2213
+ "bleu": 0.11581604983636683,
2214
+ "commonvoice_hours": null,
2215
+ "commonvoice_locale": null,
2216
+ "population": {
2217
+ "CD": 9770880
2218
+ }
2219
+ },
2220
+ {
2221
+ "language_name": "Tajik",
2222
+ "bcp_47": "tg",
2223
+ "speakers": 9644223,
2224
+ "scores": [
2225
+ {
2226
+ "model": "meta-llama/llama-3.3-70b-instruct",
2227
+ "bleu": 0.1846236171638531
2228
+ }
2229
+ ],
2230
+ "bleu": 0.1846236171638531,
2231
+ "commonvoice_hours": 0.0,
2232
+ "commonvoice_locale": "tg",
2233
+ "population": {
2234
+ "PK": 770553,
2235
+ "TJ": 8873670
2236
+ }
2237
+ },
2238
+ {
2239
+ "language_name": "Umbundu",
2240
+ "bcp_47": "umb",
2241
+ "speakers": 9431467,
2242
+ "scores": [
2243
+ {
2244
+ "model": "meta-llama/llama-3.3-70b-instruct",
2245
+ "bleu": 0.05520341910203098
2246
+ }
2247
+ ],
2248
+ "bleu": 0.05520341910203098,
2249
+ "commonvoice_hours": null,
2250
+ "commonvoice_locale": null,
2251
+ "population": {
2252
+ "AO": 9431467
2253
+ }
2254
+ },
2255
+ {
2256
+ "language_name": "Bambara",
2257
+ "bcp_47": "bm",
2258
+ "speakers": 9385632,
2259
+ "scores": [
2260
+ {
2261
+ "model": "meta-llama/llama-3.3-70b-instruct",
2262
+ "bleu": 0.07227674667013836
2263
+ }
2264
+ ],
2265
+ "bleu": 0.07227674667013836,
2266
+ "commonvoice_hours": 0.0,
2267
+ "commonvoice_locale": "bm",
2268
+ "population": {
2269
+ "ML": 9385632
2270
+ }
2271
+ },
2272
+ {
2273
+ "language_name": "Afrikaans",
2274
+ "bcp_47": "af",
2275
+ "speakers": 9318845,
2276
+ "scores": [
2277
+ {
2278
+ "model": "openai/gpt-4o-mini",
2279
+ "bleu": 0.3277177864074156
2280
+ },
2281
+ {
2282
+ "model": "meta-llama/llama-3.3-70b-instruct",
2283
+ "bleu": 0.31538459755111
2284
+ },
2285
+ {
2286
+ "model": "mistralai/mistral-small-24b-instruct-2501",
2287
+ "bleu": 0.26710030799119333
2288
+ },
2289
+ {
2290
+ "model": "google/gemini-2.0-flash-001",
2291
+ "bleu": 0.4361740814378139
2292
+ },
2293
+ {
2294
+ "model": "deepseek/deepseek-chat",
2295
+ "bleu": 0.37149647257024515
2296
+ },
2297
+ {
2298
+ "model": "microsoft/phi-4",
2299
+ "bleu": 0.2883662842075808
2300
+ }
2301
+ ],
2302
+ "bleu": 0.3343732550275598,
2303
+ "commonvoice_hours": 0.5,
2304
+ "commonvoice_locale": "af",
2305
+ "population": {
2306
+ "BW": 6025,
2307
+ "NA": 1972552,
2308
+ "ZA": 7340268
2309
+ }
2310
+ },
2311
+ {
2312
+ "language_name": "Kikuyu",
2313
+ "bcp_47": "ki",
2314
+ "speakers": 9099743,
2315
+ "scores": [
2316
+ {
2317
+ "model": "meta-llama/llama-3.3-70b-instruct",
2318
+ "bleu": 0.0883556207236924
2319
+ }
2320
+ ],
2321
+ "bleu": 0.0883556207236924,
2322
+ "commonvoice_hours": 0.0,
2323
+ "commonvoice_locale": "ki",
2324
+ "population": {
2325
+ "KE": 9099743
2326
+ }
2327
+ },
2328
+ {
2329
+ "language_name": "Haitian Creole",
2330
+ "bcp_47": "ht",
2331
+ "speakers": 8964918,
2332
+ "scores": [
2333
+ {
2334
+ "model": "meta-llama/llama-3.3-70b-instruct",
2335
+ "bleu": 0.1872609836464467
2336
+ }
2337
+ ],
2338
+ "bleu": 0.1872609836464467,
2339
+ "commonvoice_hours": 0.0,
2340
+ "commonvoice_locale": "ht",
2341
+ "population": {
2342
+ "HT": 8964918
2343
+ }
2344
+ },
2345
+ {
2346
+ "language_name": "Catalan",
2347
+ "bcp_47": "ca",
2348
+ "speakers": 8679139,
2349
+ "scores": [
2350
+ {
2351
+ "model": "meta-llama/llama-3.3-70b-instruct",
2352
+ "bleu": 0.29445274007068095
2353
+ }
2354
+ ],
2355
+ "bleu": 0.29445274007068095,
2356
+ "commonvoice_hours": 2842.0,
2357
+ "commonvoice_locale": "ca",
2358
+ "population": {
2359
+ "AD": 39270,
2360
+ "ES": 8502686,
2361
+ "FR": 115342,
2362
+ "IT": 21841
2363
+ }
2364
+ },
2365
+ {
2366
+ "language_name": "Hebrew",
2367
+ "bcp_47": "he",
2368
+ "speakers": 8675480,
2369
+ "scores": [
2370
+ {
2371
+ "model": "meta-llama/llama-3.3-70b-instruct",
2372
+ "bleu": 0.2824719214447976
2373
+ }
2374
+ ],
2375
+ "bleu": 0.2824719214447976,
2376
+ "commonvoice_hours": 1.1,
2377
+ "commonvoice_locale": "he",
2378
+ "population": {
2379
+ "IL": 8675480
2380
+ }
2381
+ },
2382
+ {
2383
+ "language_name": "Mossi",
2384
+ "bcp_47": "mos",
2385
+ "speakers": 8334160,
2386
+ "scores": [
2387
+ {
2388
+ "model": "meta-llama/llama-3.3-70b-instruct",
2389
+ "bleu": 0.08102496244147746
2390
+ }
2391
+ ],
2392
+ "bleu": 0.08102496244147746,
2393
+ "commonvoice_hours": 0.0,
2394
+ "commonvoice_locale": "mos",
2395
+ "population": {
2396
+ "BF": 8334160
2397
+ }
2398
+ },
2399
+ {
2400
+ "language_name": "Kimbundu",
2401
+ "bcp_47": "kmb",
2402
+ "speakers": 8130575,
2403
+ "scores": [
2404
+ {
2405
+ "model": "meta-llama/llama-3.3-70b-instruct",
2406
+ "bleu": 0.07329500673809967
2407
+ }
2408
+ ],
2409
+ "bleu": 0.07329500673809967,
2410
+ "commonvoice_hours": null,
2411
+ "commonvoice_locale": null,
2412
+ "population": {
2413
+ "AO": 8130575
2414
+ }
2415
+ },
2416
+ {
2417
+ "language_name": "Uyghur",
2418
+ "bcp_47": "ug",
2419
+ "speakers": 8052967,
2420
+ "scores": [
2421
+ {
2422
+ "model": "meta-llama/llama-3.3-70b-instruct",
2423
+ "bleu": 0.18397910035998616
2424
+ }
2425
+ ],
2426
+ "bleu": 0.18397910035998616,
2427
+ "commonvoice_hours": 361.0,
2428
+ "commonvoice_locale": "ug",
2429
+ "population": {
2430
+ "AF": 3005,
2431
+ "CN": 7667110,
2432
+ "KZ": 381838,
2433
+ "MN": 1014
2434
+ }
2435
+ },
2436
+ {
2437
+ "language_name": "Minangkabau",
2438
+ "bcp_47": "min",
2439
+ "speakers": 8010780,
2440
+ "scores": [
2441
+ {
2442
+ "model": "meta-llama/llama-3.3-70b-instruct",
2443
+ "bleu": 0.22401269807719826
2444
+ }
2445
+ ],
2446
+ "bleu": 0.22401269807719826,
2447
+ "commonvoice_hours": null,
2448
+ "commonvoice_locale": null,
2449
+ "population": {
2450
+ "ID": 8010780
2451
+ }
2452
+ },
2453
+ {
2454
+ "language_name": "Bulgarian",
2455
+ "bcp_47": "bg",
2456
+ "speakers": 7878315,
2457
+ "scores": [
2458
+ {
2459
+ "model": "meta-llama/llama-3.3-70b-instruct",
2460
+ "bleu": 0.24723779163445408
2461
+ }
2462
+ ],
2463
+ "bleu": 0.24723779163445408,
2464
+ "commonvoice_hours": 16.0,
2465
+ "commonvoice_locale": "bg",
2466
+ "population": {
2467
+ "BG": 6966900,
2468
+ "GR": 28639,
2469
+ "MD": 316263,
2470
+ "RO": 6817,
2471
+ "TR": 344474,
2472
+ "UA": 215222
2473
+ }
2474
+ },
2475
+ {
2476
+ "language_name": "Standard Moroccan Tamazight",
2477
+ "bcp_47": "zgh",
2478
+ "speakers": 7823574,
2479
+ "scores": [
2480
+ {
2481
+ "model": "meta-llama/llama-3.3-70b-instruct",
2482
+ "bleu": 0.0366272802298245
2483
+ }
2484
+ ],
2485
+ "bleu": 0.0366272802298245,
2486
+ "commonvoice_hours": 1.3,
2487
+ "commonvoice_locale": "zgh",
2488
+ "population": {
2489
+ "MA": 7823574
2490
+ }
2491
+ },
2492
+ {
2493
+ "language_name": "Bosnian",
2494
+ "bcp_47": "bs",
2495
+ "speakers": 7594468,
2496
+ "scores": [
2497
+ {
2498
+ "model": "meta-llama/llama-3.3-70b-instruct",
2499
+ "bleu": 0.3051247921441283
2500
+ }
2501
+ ],
2502
+ "bleu": 0.3051247921441283,
2503
+ "commonvoice_hours": 0.0,
2504
+ "commonvoice_locale": "bs",
2505
+ "population": {
2506
+ "BA": 7594468
2507
+ }
2508
+ },
2509
+ {
2510
+ "language_name": "Rundi",
2511
+ "bcp_47": "rn",
2512
+ "speakers": 7475454,
2513
+ "scores": [
2514
+ {
2515
+ "model": "meta-llama/llama-3.3-70b-instruct",
2516
+ "bleu": 0.0957054530624
2517
+ }
2518
+ ],
2519
+ "bleu": 0.0957054530624,
2520
+ "commonvoice_hours": null,
2521
+ "commonvoice_locale": null,
2522
+ "population": {
2523
+ "BI": 7475454
2524
+ }
2525
+ },
2526
+ {
2527
+ "language_name": "Santali",
2528
+ "bcp_47": "sat",
2529
+ "speakers": 7293495,
2530
+ "scores": [
2531
+ {
2532
+ "model": "meta-llama/llama-3.3-70b-instruct",
2533
+ "bleu": 0.11554870024021023
2534
+ }
2535
+ ],
2536
+ "bleu": 0.11554870024021023,
2537
+ "commonvoice_hours": 0.5,
2538
+ "commonvoice_locale": "sat",
2539
+ "population": {
2540
+ "IN": 7293495
2541
+ }
2542
+ },
2543
+ {
2544
+ "language_name": "Danish",
2545
+ "bcp_47": "da",
2546
+ "speakers": 7072056,
2547
+ "scores": [
2548
+ {
2549
+ "model": "meta-llama/llama-3.3-70b-instruct",
2550
+ "bleu": 0.3512857581168584
2551
+ }
2552
+ ],
2553
+ "bleu": 0.3512857581168584,
2554
+ "commonvoice_hours": 13.0,
2555
+ "commonvoice_locale": "da",
2556
+ "population": {
2557
+ "DE": 1603194,
2558
+ "DK": 5458551,
2559
+ "GL": 8066,
2560
+ "IS": 2245
2561
+ }
2562
+ },
2563
+ {
2564
+ "language_name": "Turkmen",
2565
+ "bcp_47": "tk",
2566
+ "speakers": 6870838,
2567
+ "scores": [
2568
+ {
2569
+ "model": "openai/gpt-4o-mini",
2570
+ "bleu": 0.15410064596625964
2571
+ },
2572
+ {
2573
+ "model": "meta-llama/llama-3.3-70b-instruct",
2574
+ "bleu": 0.14820890318014426
2575
+ },
2576
+ {
2577
+ "model": "mistralai/mistral-small-24b-instruct-2501",
2578
+ "bleu": 0.07938993687949465
2579
+ },
2580
+ {
2581
+ "model": "google/gemini-2.0-flash-001",
2582
+ "bleu": 0.2761834512123037
2583
+ },
2584
+ {
2585
+ "model": "deepseek/deepseek-chat",
2586
+ "bleu": 0.1718597543270264
2587
+ },
2588
+ {
2589
+ "model": "microsoft/phi-4",
2590
+ "bleu": 0.10118596975980092
2591
+ }
2592
+ ],
2593
+ "bleu": 0.15515477688750492,
2594
+ "commonvoice_hours": 2.8,
2595
+ "commonvoice_locale": "tk",
2596
+ "population": {
2597
+ "AF": 622945,
2598
+ "IR": 2377852,
2599
+ "TM": 3870041
2600
+ }
2601
+ },
2602
+ {
2603
+ "language_name": "Kurdish",
2604
+ "bcp_47": "ku",
2605
+ "speakers": 6866757,
2606
+ "scores": [
2607
+ {
2608
+ "model": "openai/gpt-4o-mini",
2609
+ "bleu": 0.14440915289810186
2610
+ },
2611
+ {
2612
+ "model": "meta-llama/llama-3.3-70b-instruct",
2613
+ "bleu": 0.15987085387022903
2614
+ },
2615
+ {
2616
+ "model": "mistralai/mistral-small-24b-instruct-2501",
2617
+ "bleu": 0.10987778830152085
2618
+ },
2619
+ {
2620
+ "model": "google/gemini-2.0-flash-001",
2621
+ "bleu": 0.28985769410441137
2622
+ },
2623
+ {
2624
+ "model": "deepseek/deepseek-chat",
2625
+ "bleu": 0.1865343501300658
2626
+ },
2627
+ {
2628
+ "model": "microsoft/phi-4",
2629
+ "bleu": 0.10000019378200214
2630
+ }
2631
+ ],
2632
+ "bleu": 0.16509167218105517,
2633
+ "commonvoice_hours": 69.0,
2634
+ "commonvoice_locale": "kmr",
2635
+ "population": {
2636
+ "AM": 99704,
2637
+ "AZ": 24494,
2638
+ "DE": 529054,
2639
+ "GE": 35573,
2640
+ "LB": 92983,
2641
+ "SY": 1551872,
2642
+ "TM": 22115,
2643
+ "TR": 4510962
2644
+ }
2645
+ },
2646
+ {
2647
+ "language_name": "Croatian",
2648
+ "bcp_47": "hr",
2649
+ "speakers": 6813164,
2650
+ "scores": [
2651
+ {
2652
+ "model": "meta-llama/llama-3.3-70b-instruct",
2653
+ "bleu": 0.2290484937313612
2654
+ }
2655
+ ],
2656
+ "bleu": 0.2290484937313612,
2657
+ "commonvoice_hours": 0.0,
2658
+ "commonvoice_locale": "hr",
2659
+ "population": {
2660
+ "AT": 106313,
2661
+ "BA": 460271,
2662
+ "CA": 45233,
2663
+ "DE": 633262,
2664
+ "HR": 4185472,
2665
+ "HU": 31270,
2666
+ "IT": 3495,
2667
+ "RS": 65213,
2668
+ "SI": 1282635
2669
+ }
2670
+ },
2671
+ {
2672
+ "language_name": "Albanian",
2673
+ "bcp_47": "sq",
2674
+ "speakers": 6791906,
2675
+ "scores": [
2676
+ {
2677
+ "model": "meta-llama/llama-3.3-70b-instruct",
2678
+ "bleu": 0.26490711574268994
2679
+ }
2680
+ ],
2681
+ "bleu": 0.26490711574268994,
2682
+ "commonvoice_hours": 8.8,
2683
+ "commonvoice_locale": "sq",
2684
+ "population": {
2685
+ "AL": 3074580,
2686
+ "GR": 9971,
2687
+ "ME": 48179,
2688
+ "MK": 531492,
2689
+ "RS": 1332312,
2690
+ "TR": 17224,
2691
+ "XK": 1778148
2692
+ }
2693
+ },
2694
+ {
2695
+ "language_name": "Slovak",
2696
+ "bcp_47": "sk",
2697
+ "speakers": 6680269,
2698
+ "scores": [
2699
+ {
2700
+ "model": "meta-llama/llama-3.3-70b-instruct",
2701
+ "bleu": 0.2826836020834733
2702
+ }
2703
+ ],
2704
+ "bleu": 0.2826836020834733,
2705
+ "commonvoice_hours": 39.0,
2706
+ "commonvoice_locale": "sk",
2707
+ "population": {
2708
+ "CZ": 1712400,
2709
+ "HU": 11726,
2710
+ "RS": 59603,
2711
+ "SK": 4896540
2712
+ }
2713
+ },
2714
+ {
2715
+ "language_name": "Dyula",
2716
+ "bcp_47": "dyu",
2717
+ "speakers": 6667328,
2718
+ "scores": [
2719
+ {
2720
+ "model": "meta-llama/llama-3.3-70b-instruct",
2721
+ "bleu": 0.0633017924291756
2722
+ }
2723
+ ],
2724
+ "bleu": 0.0633017924291756,
2725
+ "commonvoice_hours": 0.3,
2726
+ "commonvoice_locale": "dyu",
2727
+ "population": {
2728
+ "BF": 6667328
2729
+ }
2730
+ },
2731
+ {
2732
+ "language_name": "Mongolian",
2733
+ "bcp_47": "mn",
2734
+ "speakers": 6572846,
2735
+ "scores": [
2736
+ {
2737
+ "model": "meta-llama/llama-3.3-70b-instruct",
2738
+ "bleu": 0.1950781841033538
2739
+ }
2740
+ ],
2741
+ "bleu": 0.1950781841033538,
2742
+ "commonvoice_hours": 46.0,
2743
+ "commonvoice_locale": "mn",
2744
+ "population": {
2745
+ "CN": 3624452,
2746
+ "MN": 2946268,
2747
+ "RU": 2126
2748
+ }
2749
+ },
2750
+ {
2751
+ "language_name": "Southern Sotho",
2752
+ "bcp_47": "st",
2753
+ "speakers": 6390567,
2754
+ "scores": [
2755
+ {
2756
+ "model": "meta-llama/llama-3.3-70b-instruct",
2757
+ "bleu": 0.12381413258013083
2758
+ }
2759
+ ],
2760
+ "bleu": 0.12381413258013083,
2761
+ "commonvoice_hours": 0.0,
2762
+ "commonvoice_locale": "st",
2763
+ "population": {
2764
+ "LS": 1929943,
2765
+ "ZA": 4460624
2766
+ }
2767
+ },
2768
+ {
2769
+ "language_name": "Tswana",
2770
+ "bcp_47": "tn",
2771
+ "speakers": 6113428,
2772
+ "scores": [
2773
+ {
2774
+ "model": "meta-llama/llama-3.3-70b-instruct",
2775
+ "bleu": 0.09139131060492443
2776
+ }
2777
+ ],
2778
+ "bleu": 0.09139131060492443,
2779
+ "commonvoice_hours": 4.2,
2780
+ "commonvoice_locale": "tn",
2781
+ "population": {
2782
+ "BW": 1436683,
2783
+ "NA": 14728,
2784
+ "ZA": 4630015,
2785
+ "ZW": 32002
2786
+ }
2787
+ },
2788
+ {
2789
+ "language_name": "Guarani",
2790
+ "bcp_47": "gn",
2791
+ "speakers": 5827107,
2792
+ "scores": [
2793
+ {
2794
+ "model": "meta-llama/llama-3.3-70b-instruct",
2795
+ "bleu": 0.12296923497272805
2796
+ }
2797
+ ],
2798
+ "bleu": 0.12296923497272805,
2799
+ "commonvoice_hours": 3.7,
2800
+ "commonvoice_locale": "gn",
2801
+ "population": {
2802
+ "AR": 21375,
2803
+ "BO": 52380,
2804
+ "PY": 5753352
2805
+ }
2806
+ },
2807
+ {
2808
+ "language_name": "Finnish",
2809
+ "bcp_47": "fi",
2810
+ "speakers": 5736842,
2811
+ "scores": [
2812
+ {
2813
+ "model": "meta-llama/llama-3.3-70b-instruct",
2814
+ "bleu": 0.2306868672081301
2815
+ }
2816
+ ],
2817
+ "bleu": 0.2306868672081301,
2818
+ "commonvoice_hours": 15.0,
2819
+ "commonvoice_locale": "fi",
2820
+ "population": {
2821
+ "EE": 258010,
2822
+ "FI": 5237370,
2823
+ "RU": 17007,
2824
+ "SE": 224455
2825
+ }
2826
+ },
2827
+ {
2828
+ "language_name": "Ganda",
2829
+ "bcp_47": "lg",
2830
+ "speakers": 5622890,
2831
+ "scores": [
2832
+ {
2833
+ "model": "meta-llama/llama-3.3-70b-instruct",
2834
+ "bleu": 0.09865217050437662
2835
+ }
2836
+ ],
2837
+ "bleu": 0.09865217050437662,
2838
+ "commonvoice_hours": 437.0,
2839
+ "commonvoice_locale": "lg",
2840
+ "population": {
2841
+ "UG": 5622890
2842
+ }
2843
+ },
2844
+ {
2845
+ "language_name": "Kashmiri",
2846
+ "bcp_47": "ks",
2847
+ "speakers": 5598085,
2848
+ "scores": [
2849
+ {
2850
+ "model": "meta-llama/llama-3.3-70b-instruct",
2851
+ "bleu": 0.1344939664526747
2852
+ }
2853
+ ],
2854
+ "bleu": 0.1344939664526747,
2855
+ "commonvoice_hours": null,
2856
+ "commonvoice_locale": null,
2857
+ "population": {
2858
+ "IN": 5436969,
2859
+ "PK": 161116
2860
+ }
2861
+ },
2862
+ {
2863
+ "language_name": "Norwegian Bokmål",
2864
+ "bcp_47": "nb",
2865
+ "speakers": 5468932,
2866
+ "scores": [
2867
+ {
2868
+ "model": "meta-llama/llama-3.3-70b-instruct",
2869
+ "bleu": 0.3568538739752233
2870
+ }
2871
+ ],
2872
+ "bleu": 0.3568538739752233,
2873
+ "commonvoice_hours": 0.1,
2874
+ "commonvoice_locale": "nb-NO",
2875
+ "population": {
2876
+ "NO": 5467440,
2877
+ "SJ": 1492
2878
+ }
2879
+ },
2880
+ {
2881
+ "language_name": "Bemba",
2882
+ "bcp_47": "bem",
2883
+ "speakers": 5402246,
2884
+ "scores": [
2885
+ {
2886
+ "model": "openai/gpt-4o-mini",
2887
+ "bleu": 0.07496563614353445
2888
+ },
2889
+ {
2890
+ "model": "meta-llama/llama-3.3-70b-instruct",
2891
+ "bleu": 0.10425825663987873
2892
+ },
2893
+ {
2894
+ "model": "mistralai/mistral-small-24b-instruct-2501",
2895
+ "bleu": 0.048552315311727906
2896
+ },
2897
+ {
2898
+ "model": "google/gemini-2.0-flash-001",
2899
+ "bleu": 0.20820762589055672
2900
+ },
2901
+ {
2902
+ "model": "deepseek/deepseek-chat",
2903
+ "bleu": 0.13037194233770932
2904
+ },
2905
+ {
2906
+ "model": "microsoft/phi-4",
2907
+ "bleu": 0.07919429950933718
2908
+ }
2909
+ ],
2910
+ "bleu": 0.10759167930545738,
2911
+ "commonvoice_hours": null,
2912
+ "commonvoice_locale": null,
2913
+ "population": {
2914
+ "ZM": 5402246
2915
+ }
2916
+ },
2917
+ {
2918
+ "language_name": "Armenian",
2919
+ "bcp_47": "hy",
2920
+ "speakers": 5317273,
2921
+ "scores": [
2922
+ {
2923
+ "model": "meta-llama/llama-3.3-70b-instruct",
2924
+ "bleu": 0.2525113198548088
2925
+ }
2926
+ ],
2927
+ "bleu": 0.2525113198548088,
2928
+ "commonvoice_hours": 31.0,
2929
+ "commonvoice_locale": "hy-AM",
2930
+ "population": {
2931
+ "AM": 2960894,
2932
+ "CY": 2787,
2933
+ "GE": 279790,
2934
+ "IR": 203816,
2935
+ "LB": 284420,
2936
+ "RU": 1190465,
2937
+ "SY": 349171,
2938
+ "TR": 45930
2939
+ }
2940
+ },
2941
+ {
2942
+ "language_name": "Northern Sotho",
2943
+ "bcp_47": "nso",
2944
+ "speakers": 5307578,
2945
+ "scores": [
2946
+ {
2947
+ "model": "openai/gpt-4o-mini",
2948
+ "bleu": 0.1287306186367617
2949
+ },
2950
+ {
2951
+ "model": "meta-llama/llama-3.3-70b-instruct",
2952
+ "bleu": 0.11431860079235977
2953
+ },
2954
+ {
2955
+ "model": "mistralai/mistral-small-24b-instruct-2501",
2956
+ "bleu": 0.048032427671766596
2957
+ },
2958
+ {
2959
+ "model": "google/gemini-2.0-flash-001",
2960
+ "bleu": 0.277532484522071
2961
+ },
2962
+ {
2963
+ "model": "deepseek/deepseek-chat",
2964
+ "bleu": 0.1559013863573944
2965
+ },
2966
+ {
2967
+ "model": "microsoft/phi-4",
2968
+ "bleu": 0.08683694629684643
2969
+ }
2970
+ ],
2971
+ "bleu": 0.13522541071286664,
2972
+ "commonvoice_hours": 0.0,
2973
+ "commonvoice_locale": "nso",
2974
+ "population": {
2975
+ "ZA": 5307578
2976
+ }
2977
+ },
2978
+ {
2979
+ "language_name": "Luo (Kenya and Tanzania)",
2980
+ "bcp_47": "luo",
2981
+ "speakers": 5245734,
2982
+ "scores": [
2983
+ {
2984
+ "model": "meta-llama/llama-3.3-70b-instruct",
2985
+ "bleu": 0.07123028733548639
2986
+ }
2987
+ ],
2988
+ "bleu": 0.07123028733548639,
2989
+ "commonvoice_hours": 30.0,
2990
+ "commonvoice_locale": "luo",
2991
+ "population": {
2992
+ "KE": 5245734
2993
+ }
2994
+ },
2995
+ {
2996
+ "language_name": "Tok Pisin",
2997
+ "bcp_47": "tpi",
2998
+ "speakers": 5154217,
2999
+ "scores": [
3000
+ {
3001
+ "model": "meta-llama/llama-3.3-70b-instruct",
3002
+ "bleu": 0.17665711931817996
3003
+ }
3004
+ ],
3005
+ "bleu": 0.17665711931817996,
3006
+ "commonvoice_hours": null,
3007
+ "commonvoice_locale": null,
3008
+ "population": {
3009
+ "PG": 5154217
3010
+ }
3011
+ },
3012
+ {
3013
+ "language_name": "Lao",
3014
+ "bcp_47": "lo",
3015
+ "speakers": 5138706,
3016
+ "scores": [
3017
+ {
3018
+ "model": "meta-llama/llama-3.3-70b-instruct",
3019
+ "bleu": 0.17291556794348653
3020
+ }
3021
+ ],
3022
+ "bleu": 0.17291556794348653,
3023
+ "commonvoice_hours": 0.2,
3024
+ "commonvoice_locale": "lo",
3025
+ "population": {
3026
+ "LA": 5138706
3027
+ }
3028
+ },
3029
+ {
3030
+ "language_name": "Tsonga",
3031
+ "bcp_47": "ts",
3032
+ "speakers": 4880932,
3033
+ "scores": [
3034
+ {
3035
+ "model": "openai/gpt-4o-mini",
3036
+ "bleu": 0.1264498146181144
3037
+ },
3038
+ {
3039
+ "model": "meta-llama/llama-3.3-70b-instruct",
3040
+ "bleu": 0.09614725376527729
3041
+ },
3042
+ {
3043
+ "model": "mistralai/mistral-small-24b-instruct-2501",
3044
+ "bleu": 0.043920591728788254
3045
+ },
3046
+ {
3047
+ "model": "google/gemini-2.0-flash-001",
3048
+ "bleu": 0.2843690426617385
3049
+ },
3050
+ {
3051
+ "model": "deepseek/deepseek-chat",
3052
+ "bleu": 0.10072669531344912
3053
+ },
3054
+ {
3055
+ "model": "microsoft/phi-4",
3056
+ "bleu": 0.0708900783780892
3057
+ }
3058
+ ],
3059
+ "bleu": 0.12041724607757613,
3060
+ "commonvoice_hours": 0.0,
3061
+ "commonvoice_locale": "ts",
3062
+ "population": {
3063
+ "MZ": 2377758,
3064
+ "SZ": 18776,
3065
+ "ZA": 2484398
3066
+ }
3067
+ },
3068
+ {
3069
+ "language_name": "Balinese",
3070
+ "bcp_47": "ban",
3071
+ "speakers": 4806468,
3072
+ "scores": [
3073
+ {
3074
+ "model": "meta-llama/llama-3.3-70b-instruct",
3075
+ "bleu": 0.20937766416587725
3076
+ }
3077
+ ],
3078
+ "bleu": 0.20937766416587725,
3079
+ "commonvoice_hours": null,
3080
+ "commonvoice_locale": null,
3081
+ "population": {
3082
+ "ID": 4806468
3083
+ }
3084
+ },
3085
+ {
3086
+ "language_name": "Ewe",
3087
+ "bcp_47": "ee",
3088
+ "speakers": 4690857,
3089
+ "scores": [
3090
+ {
3091
+ "model": "meta-llama/llama-3.3-70b-instruct",
3092
+ "bleu": 0.06328122760447334
3093
+ }
3094
+ ],
3095
+ "bleu": 0.06328122760447334,
3096
+ "commonvoice_hours": 0.0,
3097
+ "commonvoice_locale": "ee",
3098
+ "population": {
3099
+ "GH": 3227422,
3100
+ "TG": 1463435
3101
+ }
3102
+ },
3103
+ {
3104
+ "language_name": "Buginese",
3105
+ "bcp_47": "bug",
3106
+ "speakers": 4298211,
3107
+ "scores": [
3108
+ {
3109
+ "model": "meta-llama/llama-3.3-70b-instruct",
3110
+ "bleu": 0.11888625287150432
3111
+ }
3112
+ ],
3113
+ "bleu": 0.11888625287150432,
3114
+ "commonvoice_hours": null,
3115
+ "commonvoice_locale": null,
3116
+ "population": {
3117
+ "ID": 4272416,
3118
+ "MY": 25795
3119
+ }
3120
+ },
3121
+ {
3122
+ "language_name": "Goan Konkani",
3123
+ "bcp_47": "gom",
3124
+ "speakers": 4243488,
3125
+ "scores": [
3126
+ {
3127
+ "model": "meta-llama/llama-3.3-70b-instruct",
3128
+ "bleu": 0.17517997036818814
3129
+ }
3130
+ ],
3131
+ "bleu": 0.17517997036818814,
3132
+ "commonvoice_hours": 0.0,
3133
+ "commonvoice_locale": "gom",
3134
+ "population": {
3135
+ "IN": 4243488
3136
+ }
3137
+ },
3138
+ {
3139
+ "language_name": "Kamba",
3140
+ "bcp_47": "kam",
3141
+ "speakers": 4068120,
3142
+ "scores": [
3143
+ {
3144
+ "model": "meta-llama/llama-3.3-70b-instruct",
3145
+ "bleu": 0.09766297423802607
3146
+ }
3147
+ ],
3148
+ "bleu": 0.09766297423802607,
3149
+ "commonvoice_hours": null,
3150
+ "commonvoice_locale": null,
3151
+ "population": {
3152
+ "KE": 4068120
3153
+ }
3154
+ },
3155
+ {
3156
+ "language_name": "Banjar",
3157
+ "bcp_47": "bjn",
3158
+ "speakers": 4010288,
3159
+ "scores": [
3160
+ {
3161
+ "model": "meta-llama/llama-3.3-70b-instruct",
3162
+ "bleu": 0.21429523594040997
3163
+ }
3164
+ ],
3165
+ "bleu": 0.21429523594040997,
3166
+ "commonvoice_hours": null,
3167
+ "commonvoice_locale": null,
3168
+ "population": {
3169
+ "ID": 4005390,
3170
+ "MY": 4898
3171
+ }
3172
+ },
3173
+ {
3174
+ "language_name": "Lombard",
3175
+ "bcp_47": "lmo",
3176
+ "speakers": 3901518,
3177
+ "scores": [
3178
+ {
3179
+ "model": "openai/gpt-4o-mini",
3180
+ "bleu": 0.19986098660959015
3181
+ },
3182
+ {
3183
+ "model": "meta-llama/llama-3.3-70b-instruct",
3184
+ "bleu": 0.21159778572935684
3185
+ },
3186
+ {
3187
+ "model": "mistralai/mistral-small-24b-instruct-2501",
3188
+ "bleu": 0.1588300738463149
3189
+ },
3190
+ {
3191
+ "model": "google/gemini-2.0-flash-001",
3192
+ "bleu": 0.30563834118855027
3193
+ },
3194
+ {
3195
+ "model": "deepseek/deepseek-chat",
3196
+ "bleu": 0.22666325208418955
3197
+ },
3198
+ {
3199
+ "model": "microsoft/phi-4",
3200
+ "bleu": 0.18882590620933629
3201
+ }
3202
+ ],
3203
+ "bleu": 0.215236057611223,
3204
+ "commonvoice_hours": null,
3205
+ "commonvoice_locale": null,
3206
+ "population": {
3207
+ "CH": 344564,
3208
+ "IT": 3556954
3209
+ }
3210
+ },
3211
+ {
3212
+ "language_name": "Achinese",
3213
+ "bcp_47": "ace",
3214
+ "speakers": 3738364,
3215
+ "scores": [
3216
+ {
3217
+ "model": "meta-llama/llama-3.3-70b-instruct",
3218
+ "bleu": 0.16911632683538352
3219
+ }
3220
+ ],
3221
+ "bleu": 0.16911632683538352,
3222
+ "commonvoice_hours": 0.0,
3223
+ "commonvoice_locale": "ace",
3224
+ "population": {
3225
+ "ID": 3738364
3226
+ }
3227
+ },
3228
+ {
3229
+ "language_name": "Shan",
3230
+ "bcp_47": "shn",
3231
+ "speakers": 3687984,
3232
+ "scores": [
3233
+ {
3234
+ "model": "openai/gpt-4o-mini",
3235
+ "bleu": 0.03567194702202585
3236
+ },
3237
+ {
3238
+ "model": "meta-llama/llama-3.3-70b-instruct",
3239
+ "bleu": 0.14589621017705648
3240
+ },
3241
+ {
3242
+ "model": "mistralai/mistral-small-24b-instruct-2501",
3243
+ "bleu": 0.011114664716630177
3244
+ },
3245
+ {
3246
+ "model": "google/gemini-2.0-flash-001",
3247
+ "bleu": 0.24688742301342204
3248
+ },
3249
+ {
3250
+ "model": "deepseek/deepseek-chat",
3251
+ "bleu": 0.11534595629433392
3252
+ },
3253
+ {
3254
+ "model": "microsoft/phi-4",
3255
+ "bleu": 0.06564720827517354
3256
+ }
3257
+ ],
3258
+ "bleu": 0.10342723491644035,
3259
+ "commonvoice_hours": 0.0,
3260
+ "commonvoice_locale": "shn",
3261
+ "population": {
3262
+ "MM": 3621766,
3263
+ "TH": 66218
3264
+ }
3265
+ },
3266
+ {
3267
+ "language_name": "Georgian",
3268
+ "bcp_47": "ka",
3269
+ "speakers": 3543646,
3270
+ "scores": [
3271
+ {
3272
+ "model": "meta-llama/llama-3.3-70b-instruct",
3273
+ "bleu": 0.22489436376782782
3274
+ }
3275
+ ],
3276
+ "bleu": 0.22489436376782782,
3277
+ "commonvoice_hours": 158.0,
3278
+ "commonvoice_locale": "ka",
3279
+ "population": {
3280
+ "GE": 3437420,
3281
+ "IR": 60296,
3282
+ "TR": 45930
3283
+ }
3284
+ },
3285
+ {
3286
+ "language_name": "Galician",
3287
+ "bcp_47": "gl",
3288
+ "speakers": 3515530,
3289
+ "scores": [
3290
+ {
3291
+ "model": "meta-llama/llama-3.3-70b-instruct",
3292
+ "bleu": 0.2463790593991139
3293
+ }
3294
+ ],
3295
+ "bleu": 0.2463790593991139,
3296
+ "commonvoice_hours": 109.0,
3297
+ "commonvoice_locale": "gl",
3298
+ "population": {
3299
+ "ES": 3501106,
3300
+ "PT": 14424
3301
+ }
3302
+ },
3303
+ {
3304
+ "language_name": "Lingala",
3305
+ "bcp_47": "ln",
3306
+ "speakers": 3514491,
3307
+ "scores": [
3308
+ {
3309
+ "model": "meta-llama/llama-3.3-70b-instruct",
3310
+ "bleu": 0.10115588577551943
3311
+ }
3312
+ ],
3313
+ "bleu": 0.10115588577551943,
3314
+ "commonvoice_hours": 0.0,
3315
+ "commonvoice_locale": "ln",
3316
+ "population": {
3317
+ "AO": 217899,
3318
+ "CD": 3155180,
3319
+ "CF": 14378,
3320
+ "CG": 127034
3321
+ }
3322
+ },
3323
+ {
3324
+ "language_name": "Kabyle",
3325
+ "bcp_47": "kab",
3326
+ "speakers": 3351886,
3327
+ "scores": [
3328
+ {
3329
+ "model": "meta-llama/llama-3.3-70b-instruct",
3330
+ "bleu": 0.03368838568519845
3331
+ }
3332
+ ],
3333
+ "bleu": 0.03368838568519845,
3334
+ "commonvoice_hours": 571.0,
3335
+ "commonvoice_locale": "kab",
3336
+ "population": {
3337
+ "DZ": 3351886
3338
+ }
3339
+ },
3340
+ {
3341
+ "language_name": "Kyrgyz",
3342
+ "bcp_47": "ky",
3343
+ "speakers": 3338267,
3344
+ "scores": [
3345
+ {
3346
+ "model": "meta-llama/llama-3.3-70b-instruct",
3347
+ "bleu": 0.21002212869070494
3348
+ }
3349
+ ],
3350
+ "bleu": 0.21002212869070494,
3351
+ "commonvoice_hours": 39.0,
3352
+ "commonvoice_locale": "ky",
3353
+ "population": {
3354
+ "CN": 473967,
3355
+ "KG": 2863152,
3356
+ "TR": 1148
3357
+ }
3358
+ },
3359
+ {
3360
+ "language_name": "Fon",
3361
+ "bcp_47": "fon",
3362
+ "speakers": 3216150,
3363
+ "scores": [
3364
+ {
3365
+ "model": "meta-llama/llama-3.3-70b-instruct",
3366
+ "bleu": 0.0832678269022026
3367
+ }
3368
+ ],
3369
+ "bleu": 0.0832678269022026,
3370
+ "commonvoice_hours": null,
3371
+ "commonvoice_locale": null,
3372
+ "population": {
3373
+ "BJ": 3216150
3374
+ }
3375
+ },
3376
+ {
3377
+ "language_name": "Waray",
3378
+ "bcp_47": "war",
3379
+ "speakers": 3166927,
3380
+ "scores": [
3381
+ {
3382
+ "model": "meta-llama/llama-3.3-70b-instruct",
3383
+ "bleu": 0.2156899984074879
3384
+ }
3385
+ ],
3386
+ "bleu": 0.2156899984074879,
3387
+ "commonvoice_hours": null,
3388
+ "commonvoice_locale": null,
3389
+ "population": {
3390
+ "CA": 678,
3391
+ "PH": 3166249
3392
+ }
3393
+ },
3394
+ {
3395
+ "language_name": "Tibetan",
3396
+ "bcp_47": "bo",
3397
+ "speakers": 3006697,
3398
+ "scores": [
3399
+ {
3400
+ "model": "meta-llama/llama-3.3-70b-instruct",
3401
+ "bleu": 0.11883469874802492
3402
+ }
3403
+ ],
3404
+ "bleu": 0.11883469874802492,
3405
+ "commonvoice_hours": 0.0,
3406
+ "commonvoice_locale": "bo",
3407
+ "population": {
3408
+ "CN": 2788040,
3409
+ "IN": 145870,
3410
+ "NP": 72787
3411
+ }
3412
+ },
3413
+ {
3414
+ "language_name": "Sango",
3415
+ "bcp_47": "sg",
3416
+ "speakers": 2935521,
3417
+ "scores": [
3418
+ {
3419
+ "model": "meta-llama/llama-3.3-70b-instruct",
3420
+ "bleu": 0.052708457503892185
3421
+ }
3422
+ ],
3423
+ "bleu": 0.052708457503892185,
3424
+ "commonvoice_hours": null,
3425
+ "commonvoice_locale": null,
3426
+ "population": {
3427
+ "CF": 2935521
3428
+ }
3429
+ },
3430
+ {
3431
+ "language_name": "Aymara",
3432
+ "bcp_47": "ay",
3433
+ "speakers": 2838620,
3434
+ "scores": [
3435
+ {
3436
+ "model": "meta-llama/llama-3.3-70b-instruct",
3437
+ "bleu": 0.07563412710891973
3438
+ }
3439
+ ],
3440
+ "bleu": 0.07563412710891973,
3441
+ "commonvoice_hours": null,
3442
+ "commonvoice_locale": null,
3443
+ "population": {
3444
+ "BO": 2327980,
3445
+ "PE": 510640
3446
+ }
3447
+ },
3448
+ {
3449
+ "language_name": "Dogri",
3450
+ "bcp_47": "doi",
3451
+ "speakers": 2652180,
3452
+ "scores": [
3453
+ {
3454
+ "model": "meta-llama/llama-3.3-70b-instruct",
3455
+ "bleu": 0.18698274115592
3456
+ }
3457
+ ],
3458
+ "bleu": 0.18698274115592,
3459
+ "commonvoice_hours": null,
3460
+ "commonvoice_locale": null,
3461
+ "population": {
3462
+ "IN": 2652180
3463
+ }
3464
+ },
3465
+ {
3466
+ "language_name": "Lithuanian",
3467
+ "bcp_47": "lt",
3468
+ "speakers": 2488617,
3469
+ "scores": [
3470
+ {
3471
+ "model": "meta-llama/llama-3.3-70b-instruct",
3472
+ "bleu": 0.23629191535308328
3473
+ }
3474
+ ],
3475
+ "bleu": 0.23629191535308328,
3476
+ "commonvoice_hours": 25.0,
3477
+ "commonvoice_locale": "lt",
3478
+ "population": {
3479
+ "GB": 131522,
3480
+ "LT": 2349056,
3481
+ "PL": 8039
3482
+ }
3483
+ },
3484
+ {
3485
+ "language_name": "Swati",
3486
+ "bcp_47": "ss",
3487
+ "speakers": 2212379,
3488
+ "scores": [
3489
+ {
3490
+ "model": "meta-llama/llama-3.3-70b-instruct",
3491
+ "bleu": 0.10571792263190831
3492
+ }
3493
+ ],
3494
+ "bleu": 0.10571792263190831,
3495
+ "commonvoice_hours": 0.0,
3496
+ "commonvoice_locale": "ss",
3497
+ "population": {
3498
+ "LS": 47264,
3499
+ "SZ": 640598,
3500
+ "ZA": 1524517
3501
+ }
3502
+ },
3503
+ {
3504
+ "language_name": "Occitan",
3505
+ "bcp_47": "oc",
3506
+ "speakers": 2040398,
3507
+ "scores": [
3508
+ {
3509
+ "model": "meta-llama/llama-3.3-70b-instruct",
3510
+ "bleu": 0.3116700967049491
3511
+ }
3512
+ ],
3513
+ "bleu": 0.3116700967049491,
3514
+ "commonvoice_hours": 1.8,
3515
+ "commonvoice_locale": "oc",
3516
+ "population": {
3517
+ "ES": 4952,
3518
+ "FR": 2035446
3519
+ }
3520
+ },
3521
+ {
3522
+ "language_name": "Tatar",
3523
+ "bcp_47": "tt",
3524
+ "speakers": 1984108,
3525
+ "scores": [
3526
+ {
3527
+ "model": "meta-llama/llama-3.3-70b-instruct",
3528
+ "bleu": 0.20199966692246552
3529
+ }
3530
+ ],
3531
+ "bleu": 0.20199966692246552,
3532
+ "commonvoice_hours": 32.0,
3533
+ "commonvoice_locale": "tt",
3534
+ "population": {
3535
+ "RU": 1984108
3536
+ }
3537
+ },
3538
+ {
3539
+ "language_name": "Slovenian",
3540
+ "bcp_47": "sl",
3541
+ "speakers": 1973181,
3542
+ "scores": [
3543
+ {
3544
+ "model": "meta-llama/llama-3.3-70b-instruct",
3545
+ "bleu": 0.25710751649810404
3546
+ }
3547
+ ],
3548
+ "bleu": 0.25710751649810404,
3549
+ "commonvoice_hours": 17.0,
3550
+ "commonvoice_locale": "sl",
3551
+ "population": {
3552
+ "AT": 32780,
3553
+ "HU": 4984,
3554
+ "IT": 106085,
3555
+ "SI": 1829332
3556
+ }
3557
+ },
3558
+ {
3559
+ "language_name": "Bodo",
3560
+ "bcp_47": "brx",
3561
+ "speakers": 1856526,
3562
+ "scores": [
3563
+ {
3564
+ "model": "meta-llama/llama-3.3-70b-instruct",
3565
+ "bleu": 0.07193315161893905
3566
+ }
3567
+ ],
3568
+ "bleu": 0.07193315161893905,
3569
+ "commonvoice_hours": null,
3570
+ "commonvoice_locale": null,
3571
+ "population": {
3572
+ "IN": 1856526
3573
+ }
3574
+ },
3575
+ {
3576
+ "language_name": "Chuvash",
3577
+ "bcp_47": "cv",
3578
+ "speakers": 1842386,
3579
+ "scores": [
3580
+ {
3581
+ "model": "meta-llama/llama-3.3-70b-instruct",
3582
+ "bleu": 0.16319209573807847
3583
+ }
3584
+ ],
3585
+ "bleu": 0.16319209573807847,
3586
+ "commonvoice_hours": 27.0,
3587
+ "commonvoice_locale": "cv",
3588
+ "population": {
3589
+ "RU": 1842386
3590
+ }
3591
+ },
3592
+ {
3593
+ "language_name": "Bashkir",
3594
+ "bcp_47": "ba",
3595
+ "speakers": 1842386,
3596
+ "scores": [
3597
+ {
3598
+ "model": "meta-llama/llama-3.3-70b-instruct",
3599
+ "bleu": 0.23494956875272427
3600
+ }
3601
+ ],
3602
+ "bleu": 0.23494956875272427,
3603
+ "commonvoice_hours": 259.0,
3604
+ "commonvoice_locale": "ba",
3605
+ "population": {
3606
+ "RU": 1842386
3607
+ }
3608
+ },
3609
+ {
3610
+ "language_name": "Tumbuka",
3611
+ "bcp_47": "tum",
3612
+ "speakers": 1780514,
3613
+ "scores": [
3614
+ {
3615
+ "model": "openai/gpt-4o-mini",
3616
+ "bleu": 0.09211959148198216
3617
+ },
3618
+ {
3619
+ "model": "meta-llama/llama-3.3-70b-instruct",
3620
+ "bleu": 0.08953119623294435
3621
+ },
3622
+ {
3623
+ "model": "mistralai/mistral-small-24b-instruct-2501",
3624
+ "bleu": 0.036353192983993324
3625
+ },
3626
+ {
3627
+ "model": "google/gemini-2.0-flash-001",
3628
+ "bleu": 0.21852974820220555
3629
+ },
3630
+ {
3631
+ "model": "deepseek/deepseek-chat",
3632
+ "bleu": 0.13308678184347988
3633
+ },
3634
+ {
3635
+ "model": "microsoft/phi-4",
3636
+ "bleu": 0.06252197708878435
3637
+ }
3638
+ ],
3639
+ "bleu": 0.10535708130556494,
3640
+ "commonvoice_hours": null,
3641
+ "commonvoice_locale": null,
3642
+ "population": {
3643
+ "MW": 1780514
3644
+ }
3645
+ },
3646
+ {
3647
+ "language_name": "Macedonian",
3648
+ "bcp_47": "mk",
3649
+ "speakers": 1608565,
3650
+ "scores": [
3651
+ {
3652
+ "model": "meta-llama/llama-3.3-70b-instruct",
3653
+ "bleu": 0.2635416107541368
3654
+ }
3655
+ ],
3656
+ "bleu": 0.2635416107541368,
3657
+ "commonvoice_hours": 18.0,
3658
+ "commonvoice_locale": "mk",
3659
+ "population": {
3660
+ "AL": 14451,
3661
+ "GR": 169714,
3662
+ "MK": 1424400
3663
+ }
3664
+ },
3665
+ {
3666
+ "language_name": "Pangasinan",
3667
+ "bcp_47": "pag",
3668
+ "speakers": 1528534,
3669
+ "scores": [
3670
+ {
3671
+ "model": "openai/gpt-4o-mini",
3672
+ "bleu": 0.14637588345836686
3673
+ },
3674
+ {
3675
+ "model": "meta-llama/llama-3.3-70b-instruct",
3676
+ "bleu": 0.17061619096272593
3677
+ },
3678
+ {
3679
+ "model": "mistralai/mistral-small-24b-instruct-2501",
3680
+ "bleu": 0.11313843155080379
3681
+ },
3682
+ {
3683
+ "model": "google/gemini-2.0-flash-001",
3684
+ "bleu": 0.27369890360254523
3685
+ },
3686
+ {
3687
+ "model": "deepseek/deepseek-chat",
3688
+ "bleu": 0.19116528491340065
3689
+ },
3690
+ {
3691
+ "model": "microsoft/phi-4",
3692
+ "bleu": 0.11054650956119119
3693
+ }
3694
+ ],
3695
+ "bleu": 0.16759020067483896,
3696
+ "commonvoice_hours": null,
3697
+ "commonvoice_locale": null,
3698
+ "population": {
3699
+ "PH": 1528534
3700
+ }
3701
+ },
3702
+ {
3703
+ "language_name": "Manipuri",
3704
+ "bcp_47": "mni",
3705
+ "speakers": 1476591,
3706
+ "scores": [
3707
+ {
3708
+ "model": "meta-llama/llama-3.3-70b-instruct",
3709
+ "bleu": 0.061702153982680315
3710
+ }
3711
+ ],
3712
+ "bleu": 0.061702153982680315,
3713
+ "commonvoice_hours": 0.0,
3714
+ "commonvoice_locale": "mni",
3715
+ "population": {
3716
+ "BD": 17892,
3717
+ "IN": 1458699
3718
+ }
3719
+ },
3720
+ {
3721
+ "language_name": "Venetian",
3722
+ "bcp_47": "vec",
3723
+ "speakers": 1380829,
3724
+ "scores": [
3725
+ {
3726
+ "model": "meta-llama/llama-3.3-70b-instruct",
3727
+ "bleu": 0.24764447442173138
3728
+ }
3729
+ ],
3730
+ "bleu": 0.24764447442173138,
3731
+ "commonvoice_hours": 0.0,
3732
+ "commonvoice_locale": "vec",
3733
+ "population": {
3734
+ "BR": 508118,
3735
+ "HR": 29594,
3736
+ "IT": 811235,
3737
+ "MX": 2444,
3738
+ "SI": 29438
3739
+ }
3740
+ },
3741
+ {
3742
+ "language_name": "Norwegian Nynorsk",
3743
+ "bcp_47": "nn",
3744
+ "speakers": 1366860,
3745
+ "scores": [
3746
+ {
3747
+ "model": "meta-llama/llama-3.3-70b-instruct",
3748
+ "bleu": 0.31661912673403325
3749
+ }
3750
+ ],
3751
+ "bleu": 0.31661912673403325,
3752
+ "commonvoice_hours": 1.5,
3753
+ "commonvoice_locale": "nn-NO",
3754
+ "population": {
3755
+ "NO": 1366860
3756
+ }
3757
+ },
3758
+ {
3759
+ "language_name": "Irish",
3760
+ "bcp_47": "ga",
3761
+ "speakers": 1237487,
3762
+ "scores": [
3763
+ {
3764
+ "model": "meta-llama/llama-3.3-70b-instruct",
3765
+ "bleu": 0.2768559181644857
3766
+ }
3767
+ ],
3768
+ "bleu": 0.2768559181644857,
3769
+ "commonvoice_hours": 5.8,
3770
+ "commonvoice_locale": "ga-IE",
3771
+ "population": {
3772
+ "GB": 98642,
3773
+ "IE": 1138845
3774
+ }
3775
+ },
3776
+ {
3777
+ "language_name": "Latvian",
3778
+ "bcp_47": "lv",
3779
+ "speakers": 1147550,
3780
+ "scores": [
3781
+ {
3782
+ "model": "meta-llama/llama-3.3-70b-instruct",
3783
+ "bleu": 0.23200427142275887
3784
+ }
3785
+ ],
3786
+ "bleu": 0.23200427142275887,
3787
+ "commonvoice_hours": 260.0,
3788
+ "commonvoice_locale": "lv",
3789
+ "population": {
3790
+ "LV": 1147550
3791
+ }
3792
+ },
3793
+ {
3794
+ "language_name": "Basque",
3795
+ "bcp_47": "eu",
3796
+ "speakers": 1088519,
3797
+ "scores": [
3798
+ {
3799
+ "model": "meta-llama/llama-3.3-70b-instruct",
3800
+ "bleu": 0.1907277513380933
3801
+ }
3802
+ ],
3803
+ "bleu": 0.1907277513380933,
3804
+ "commonvoice_hours": 335.0,
3805
+ "commonvoice_locale": "eu",
3806
+ "population": {
3807
+ "ES": 1000316,
3808
+ "FR": 88203
3809
+ }
3810
+ },
3811
+ {
3812
+ "language_name": "Sardinian",
3813
+ "bcp_47": "sc",
3814
+ "speakers": 1060846,
3815
+ "scores": [
3816
+ {
3817
+ "model": "meta-llama/llama-3.3-70b-instruct",
3818
+ "bleu": 0.2003666163856343
3819
+ }
3820
+ ],
3821
+ "bleu": 0.2003666163856343,
3822
+ "commonvoice_hours": 2.9,
3823
+ "commonvoice_locale": "sc",
3824
+ "population": {
3825
+ "IT": 1060846
3826
+ }
3827
+ },
3828
+ {
3829
+ "language_name": "Najdi Arabic",
3830
+ "bcp_47": "ars",
3831
+ "speakers": 1025205,
3832
+ "scores": [
3833
+ {
3834
+ "model": "openai/gpt-4o-mini",
3835
+ "bleu": 0.26325866988203733
3836
+ },
3837
+ {
3838
+ "model": "meta-llama/llama-3.3-70b-instruct",
3839
+ "bleu": 0.25411630061861235
3840
+ },
3841
+ {
3842
+ "model": "mistralai/mistral-small-24b-instruct-2501",
3843
+ "bleu": 0.19634428413472024
3844
+ },
3845
+ {
3846
+ "model": "google/gemini-2.0-flash-001",
3847
+ "bleu": 0.3267312117229826
3848
+ },
3849
+ {
3850
+ "model": "deepseek/deepseek-chat",
3851
+ "bleu": 0.27947088689796734
3852
+ },
3853
+ {
3854
+ "model": "microsoft/phi-4",
3855
+ "bleu": 0.23043700347741075
3856
+ }
3857
+ ],
3858
+ "bleu": 0.2583930594556218,
3859
+ "commonvoice_hours": null,
3860
+ "commonvoice_locale": null,
3861
+ "population": {
3862
+ "SA": 1025205
3863
+ }
3864
+ },
3865
+ {
3866
+ "language_name": "Yiddish",
3867
+ "bcp_47": "yi",
3868
+ "speakers": 997214,
3869
+ "scores": [
3870
+ {
3871
+ "model": "meta-llama/llama-3.3-70b-instruct",
3872
+ "bleu": 0.2748989006328114
3873
+ }
3874
+ ],
3875
+ "bleu": 0.2748989006328114,
3876
+ "commonvoice_hours": 0.5,
3877
+ "commonvoice_locale": "yi",
3878
+ "population": {
3879
+ "IL": 260264,
3880
+ "SE": 2959,
3881
+ "UA": 570998,
3882
+ "US": 162993
3883
+ }
3884
+ },
3885
+ {
3886
+ "language_name": "Kachin",
3887
+ "bcp_47": "kac",
3888
+ "speakers": 962032,
3889
+ "scores": [
3890
+ {
3891
+ "model": "meta-llama/llama-3.3-70b-instruct",
3892
+ "bleu": 0.06343642810657522
3893
+ }
3894
+ ],
3895
+ "bleu": 0.06343642810657522,
3896
+ "commonvoice_hours": null,
3897
+ "commonvoice_locale": null,
3898
+ "population": {
3899
+ "MM": 962032
3900
+ }
3901
+ },
3902
+ {
3903
+ "language_name": "Limburgish",
3904
+ "bcp_47": "li",
3905
+ "speakers": 950422,
3906
+ "scores": [
3907
+ {
3908
+ "model": "meta-llama/llama-3.3-70b-instruct",
3909
+ "bleu": 0.28017358847160223
3910
+ }
3911
+ ],
3912
+ "bleu": 0.28017358847160223,
3913
+ "commonvoice_hours": null,
3914
+ "commonvoice_locale": null,
3915
+ "population": {
3916
+ "NL": 950422
3917
+ }
3918
+ },
3919
+ {
3920
+ "language_name": "Welsh",
3921
+ "bcp_47": "cy",
3922
+ "speakers": 884910,
3923
+ "scores": [
3924
+ {
3925
+ "model": "meta-llama/llama-3.3-70b-instruct",
3926
+ "bleu": 0.31667961925197524
3927
+ }
3928
+ ],
3929
+ "bleu": 0.31667961925197524,
3930
+ "commonvoice_hours": 124.0,
3931
+ "commonvoice_locale": "cy",
3932
+ "population": {
3933
+ "AR": 30016,
3934
+ "GB": 854894
3935
+ }
3936
+ },
3937
+ {
3938
+ "language_name": "Estonian",
3939
+ "bcp_47": "et",
3940
+ "speakers": 878449,
3941
+ "scores": [
3942
+ {
3943
+ "model": "meta-llama/llama-3.3-70b-instruct",
3944
+ "bleu": 0.23762161272302187
3945
+ }
3946
+ ],
3947
+ "bleu": 0.23762161272302187,
3948
+ "commonvoice_hours": 58.0,
3949
+ "commonvoice_locale": "et",
3950
+ "population": {
3951
+ "EE": 872320,
3952
+ "FI": 6129
3953
+ }
3954
+ },
3955
+ {
3956
+ "language_name": "Asturian",
3957
+ "bcp_47": "ast",
3958
+ "speakers": 650205,
3959
+ "scores": [
3960
+ {
3961
+ "model": "meta-llama/llama-3.3-70b-instruct",
3962
+ "bleu": 0.3066166431048003
3963
+ }
3964
+ ],
3965
+ "bleu": 0.3066166431048003,
3966
+ "commonvoice_hours": 0.7,
3967
+ "commonvoice_locale": "ast",
3968
+ "population": {
3969
+ "ES": 650205
3970
+ }
3971
+ },
3972
+ {
3973
+ "language_name": "N’Ko",
3974
+ "bcp_47": "nqo",
3975
+ "speakers": 626370,
3976
+ "scores": [
3977
+ {
3978
+ "model": "openai/gpt-4o-mini",
3979
+ "bleu": 0.0026829540009563496
3980
+ },
3981
+ {
3982
+ "model": "meta-llama/llama-3.3-70b-instruct",
3983
+ "bleu": 0.0028810767141941676
3984
+ },
3985
+ {
3986
+ "model": "mistralai/mistral-small-24b-instruct-2501",
3987
+ "bleu": 0.002244809403558117
3988
+ },
3989
+ {
3990
+ "model": "google/gemini-2.0-flash-001",
3991
+ "bleu": 0.1373860974763018
3992
+ },
3993
+ {
3994
+ "model": "deepseek/deepseek-chat",
3995
+ "bleu": 0.005449384832055512
3996
+ },
3997
+ {
3998
+ "model": "microsoft/phi-4",
3999
+ "bleu": 0.001220306675003964
4000
+ }
4001
+ ],
4002
+ "bleu": 0.02531077151701165,
4003
+ "commonvoice_hours": 0.0,
4004
+ "commonvoice_locale": "nqo",
4005
+ "population": {
4006
+ "GN": 626370
4007
+ }
4008
+ },
4009
+ {
4010
+ "language_name": "Nuer",
4011
+ "bcp_47": "nus",
4012
+ "speakers": 591427,
4013
+ "scores": [
4014
+ {
4015
+ "model": "meta-llama/llama-3.3-70b-instruct",
4016
+ "bleu": 0.04422064781985695
4017
+ }
4018
+ ],
4019
+ "bleu": 0.04422064781985695,
4020
+ "commonvoice_hours": null,
4021
+ "commonvoice_locale": null,
4022
+ "population": {
4023
+ "SS": 591427
4024
+ }
4025
+ },
4026
+ {
4027
+ "language_name": "Ligurian",
4028
+ "bcp_47": "lij",
4029
+ "speakers": 536663,
4030
+ "scores": [
4031
+ {
4032
+ "model": "openai/gpt-4o-mini",
4033
+ "bleu": 0.2433180508520944
4034
+ },
4035
+ {
4036
+ "model": "meta-llama/llama-3.3-70b-instruct",
4037
+ "bleu": 0.2730358021257564
4038
+ },
4039
+ {
4040
+ "model": "mistralai/mistral-small-24b-instruct-2501",
4041
+ "bleu": 0.1659569541464764
4042
+ },
4043
+ {
4044
+ "model": "google/gemini-2.0-flash-001",
4045
+ "bleu": 0.3868854055493315
4046
+ },
4047
+ {
4048
+ "model": "deepseek/deepseek-chat",
4049
+ "bleu": 0.30131335750773747
4050
+ },
4051
+ {
4052
+ "model": "microsoft/phi-4",
4053
+ "bleu": 0.22953082347299453
4054
+ }
4055
+ ],
4056
+ "bleu": 0.26667339894239844,
4057
+ "commonvoice_hours": 5.1,
4058
+ "commonvoice_locale": "lij",
4059
+ "population": {
4060
+ "IT": 536663
4061
+ }
4062
+ },
4063
+ {
4064
+ "language_name": "Kabuverdianu",
4065
+ "bcp_47": "kea",
4066
+ "speakers": 530762,
4067
+ "scores": [
4068
+ {
4069
+ "model": "meta-llama/llama-3.3-70b-instruct",
4070
+ "bleu": 0.2049568393036302
4071
+ }
4072
+ ],
4073
+ "bleu": 0.2049568393036302,
4074
+ "commonvoice_hours": null,
4075
+ "commonvoice_locale": null,
4076
+ "population": {
4077
+ "CV": 530762
4078
+ }
4079
+ },
4080
+ {
4081
+ "language_name": "Mari",
4082
+ "bcp_47": "chm",
4083
+ "speakers": 524371,
4084
+ "scores": [
4085
+ {
4086
+ "model": "meta-llama/llama-3.3-70b-instruct",
4087
+ "bleu": 0.1477332953533076
4088
+ }
4089
+ ],
4090
+ "bleu": 0.1477332953533076,
4091
+ "commonvoice_hours": 282.0,
4092
+ "commonvoice_locale": "mhr",
4093
+ "population": {
4094
+ "RU": 524371
4095
+ }
4096
+ },
4097
+ {
4098
+ "language_name": "Sicilian",
4099
+ "bcp_47": "scn",
4100
+ "speakers": 511702,
4101
+ "scores": [
4102
+ {
4103
+ "model": "meta-llama/llama-3.3-70b-instruct",
4104
+ "bleu": 0.26991391704955275
4105
+ }
4106
+ ],
4107
+ "bleu": 0.26991391704955275,
4108
+ "commonvoice_hours": 0.0,
4109
+ "commonvoice_locale": "scn",
4110
+ "population": {
4111
+ "IT": 511702
4112
+ }
4113
+ },
4114
+ {
4115
+ "language_name": "Silesian",
4116
+ "bcp_47": "szl",
4117
+ "speakers": 497670,
4118
+ "scores": [
4119
+ {
4120
+ "model": "meta-llama/llama-3.3-70b-instruct",
4121
+ "bleu": 0.2478859256009672
4122
+ }
4123
+ ],
4124
+ "bleu": 0.2478859256009672,
4125
+ "commonvoice_hours": null,
4126
+ "commonvoice_locale": null,
4127
+ "population": {
4128
+ "PL": 497670
4129
+ }
4130
+ },
4131
+ {
4132
+ "language_name": "Maltese",
4133
+ "bcp_47": "mt",
4134
+ "speakers": 457267,
4135
+ "scores": [
4136
+ {
4137
+ "model": "openai/gpt-4o-mini",
4138
+ "bleu": 0.27335683193570975
4139
+ },
4140
+ {
4141
+ "model": "meta-llama/llama-3.3-70b-instruct",
4142
+ "bleu": 0.28654849898846085
4143
+ },
4144
+ {
4145
+ "model": "mistralai/mistral-small-24b-instruct-2501",
4146
+ "bleu": 0.15248109554681186
4147
+ },
4148
+ {
4149
+ "model": "google/gemini-2.0-flash-001",
4150
+ "bleu": 0.38954095833662916
4151
+ },
4152
+ {
4153
+ "model": "deepseek/deepseek-chat",
4154
+ "bleu": 0.3175303995061197
4155
+ },
4156
+ {
4157
+ "model": "microsoft/phi-4",
4158
+ "bleu": 0.11179045198515461
4159
+ }
4160
+ ],
4161
+ "bleu": 0.2552080393831477,
4162
+ "commonvoice_hours": 8.7,
4163
+ "commonvoice_locale": "mt",
4164
+ "population": {
4165
+ "MT": 457267
4166
+ }
4167
+ },
4168
+ {
4169
+ "language_name": "Luxembourgish",
4170
+ "bcp_47": "lb",
4171
+ "speakers": 421015,
4172
+ "scores": [
4173
+ {
4174
+ "model": "meta-llama/llama-3.3-70b-instruct",
4175
+ "bleu": 0.2835765541228824
4176
+ }
4177
+ ],
4178
+ "bleu": 0.2835765541228824,
4179
+ "commonvoice_hours": 0.0,
4180
+ "commonvoice_locale": "lb",
4181
+ "population": {
4182
+ "LU": 421015
4183
+ }
4184
+ },
4185
+ {
4186
+ "language_name": "Dzongkha",
4187
+ "bcp_47": "dz",
4188
+ "speakers": 370341,
4189
+ "scores": [
4190
+ {
4191
+ "model": "meta-llama/llama-3.3-70b-instruct",
4192
+ "bleu": 0.05723633975042216
4193
+ }
4194
+ ],
4195
+ "bleu": 0.05723633975042216,
4196
+ "commonvoice_hours": null,
4197
+ "commonvoice_locale": null,
4198
+ "population": {
4199
+ "BT": 367689,
4200
+ "IN": 2652
4201
+ }
4202
+ },
4203
+ {
4204
+ "language_name": "Fijian",
4205
+ "bcp_47": "fj",
4206
+ "speakers": 365030,
4207
+ "scores": [
4208
+ {
4209
+ "model": "meta-llama/llama-3.3-70b-instruct",
4210
+ "bleu": 0.10451524271139898
4211
+ }
4212
+ ],
4213
+ "bleu": 0.10451524271139898,
4214
+ "commonvoice_hours": null,
4215
+ "commonvoice_locale": null,
4216
+ "population": {
4217
+ "FJ": 365030
4218
+ }
4219
+ },
4220
+ {
4221
+ "language_name": "Icelandic",
4222
+ "bcp_47": "is",
4223
+ "speakers": 350734,
4224
+ "scores": [
4225
+ {
4226
+ "model": "meta-llama/llama-3.3-70b-instruct",
4227
+ "bleu": 0.2142644347281729
4228
+ }
4229
+ ],
4230
+ "bleu": 0.2142644347281729,
4231
+ "commonvoice_hours": 0.1,
4232
+ "commonvoice_locale": "is",
4233
+ "population": {
4234
+ "IS": 350734
4235
+ }
4236
+ },
4237
+ {
4238
+ "language_name": "Samoan",
4239
+ "bcp_47": "sm",
4240
+ "speakers": 252717,
4241
+ "scores": [
4242
+ {
4243
+ "model": "openai/gpt-4o-mini",
4244
+ "bleu": 0.13327372905795537
4245
+ },
4246
+ {
4247
+ "model": "meta-llama/llama-3.3-70b-instruct",
4248
+ "bleu": 0.11602460228698847
4249
+ },
4250
+ {
4251
+ "model": "mistralai/mistral-small-24b-instruct-2501",
4252
+ "bleu": 0.05771585788755527
4253
+ },
4254
+ {
4255
+ "model": "google/gemini-2.0-flash-001",
4256
+ "bleu": 0.2585259997356889
4257
+ },
4258
+ {
4259
+ "model": "deepseek/deepseek-chat",
4260
+ "bleu": 0.22221137013078898
4261
+ },
4262
+ {
4263
+ "model": "microsoft/phi-4",
4264
+ "bleu": 0.04756300118196289
4265
+ }
4266
+ ],
4267
+ "bleu": 0.13921909338015664,
4268
+ "commonvoice_hours": null,
4269
+ "commonvoice_locale": null,
4270
+ "population": {
4271
+ "AS": 48943,
4272
+ "WS": 203774
4273
+ }
4274
+ },
4275
+ {
4276
+ "language_name": "Crimean Tatar",
4277
+ "bcp_47": "crh",
4278
+ "speakers": 245968,
4279
+ "scores": [
4280
+ {
4281
+ "model": "meta-llama/llama-3.3-70b-instruct",
4282
+ "bleu": 0.22725256040555009
4283
+ }
4284
+ ],
4285
+ "bleu": 0.22725256040555009,
4286
+ "commonvoice_hours": 0.0,
4287
+ "commonvoice_locale": "crh",
4288
+ "population": {
4289
+ "UA": 245968
4290
+ }
4291
+ },
4292
+ {
4293
+ "language_name": "Papiamento",
4294
+ "bcp_47": "pap",
4295
+ "speakers": 211640,
4296
+ "scores": [
4297
+ {
4298
+ "model": "meta-llama/llama-3.3-70b-instruct",
4299
+ "bleu": 0.21648792499796674
4300
+ }
4301
+ ],
4302
+ "bleu": 0.21648792499796674,
4303
+ "commonvoice_hours": 0.0,
4304
+ "commonvoice_locale": "pap-AW",
4305
+ "population": {
4306
+ "AW": 72851,
4307
+ "BQ": 16200,
4308
+ "CW": 122589
4309
+ }
4310
+ },
4311
+ {
4312
+ "language_name": "Latgalian",
4313
+ "bcp_47": "ltg",
4314
+ "speakers": 167429,
4315
+ "scores": [
4316
+ {
4317
+ "model": "meta-llama/llama-3.3-70b-instruct",
4318
+ "bleu": 0.13475273241606922
4319
+ }
4320
+ ],
4321
+ "bleu": 0.13475273241606922,
4322
+ "commonvoice_hours": 28.0,
4323
+ "commonvoice_locale": "ltg",
4324
+ "population": {
4325
+ "LV": 167429
4326
+ }
4327
+ },
4328
+ {
4329
+ "language_name": "Māori",
4330
+ "bcp_47": "mi",
4331
+ "speakers": 137913,
4332
+ "scores": [
4333
+ {
4334
+ "model": "meta-llama/llama-3.3-70b-instruct",
4335
+ "bleu": 0.17610729049259877
4336
+ }
4337
+ ],
4338
+ "bleu": 0.17610729049259877,
4339
+ "commonvoice_hours": null,
4340
+ "commonvoice_locale": null,
4341
+ "population": {
4342
+ "NZ": 137913
4343
+ }
4344
+ },
4345
+ {
4346
+ "language_name": "Scottish Gaelic",
4347
+ "bcp_47": "gd",
4348
+ "speakers": 72337,
4349
+ "scores": [
4350
+ {
4351
+ "model": "meta-llama/llama-3.3-70b-instruct",
4352
+ "bleu": 0.2154279041570466
4353
+ }
4354
+ ],
4355
+ "bleu": 0.2154279041570466,
4356
+ "commonvoice_hours": null,
4357
+ "commonvoice_locale": null,
4358
+ "population": {
4359
+ "GB": 72337
4360
+ }
4361
+ },
4362
+ {
4363
+ "language_name": "Faroese",
4364
+ "bcp_47": "fo",
4365
+ "speakers": 71351,
4366
+ "scores": [
4367
+ {
4368
+ "model": "meta-llama/llama-3.3-70b-instruct",
4369
+ "bleu": 0.21180629663838063
4370
+ }
4371
+ ],
4372
+ "bleu": 0.21180629663838063,
4373
+ "commonvoice_hours": 0.0,
4374
+ "commonvoice_locale": "fo",
4375
+ "population": {
4376
+ "DK": 22304,
4377
+ "FO": 49047
4378
+ }
4379
+ },
4380
+ {
4381
+ "language_name": "Friulian",
4382
+ "bcp_47": "fur",
4383
+ "speakers": 37442,
4384
+ "scores": [
4385
+ {
4386
+ "model": "meta-llama/llama-3.3-70b-instruct",
4387
+ "bleu": 0.2255876860328074
4388
+ }
4389
+ ],
4390
+ "bleu": 0.2255876860328074,
4391
+ "commonvoice_hours": null,
4392
+ "commonvoice_locale": null,
4393
+ "population": {
4394
+ "IT": 37442
4395
+ }
4396
+ },
4397
+ {
4398
+ "language_name": "Sanskrit",
4399
+ "bcp_47": "sa",
4400
+ "speakers": 15913,
4401
+ "scores": [
4402
+ {
4403
+ "model": "meta-llama/llama-3.3-70b-instruct",
4404
+ "bleu": 0.14313294345831834
4405
+ }
4406
+ ],
4407
+ "bleu": 0.14313294345831834,
4408
+ "commonvoice_hours": null,
4409
+ "commonvoice_locale": null,
4410
+ "population": {
4411
+ "IN": 15913
4412
+ }
4413
+ },
4414
+ {
4415
+ "language_name": "Esperanto",
4416
+ "bcp_47": "eo",
4417
+ "speakers": 301,
4418
+ "scores": [
4419
+ {
4420
+ "model": "meta-llama/llama-3.3-70b-instruct",
4421
+ "bleu": 0.27440987441620224
4422
+ }
4423
+ ],
4424
+ "bleu": 0.27440987441620224,
4425
+ "commonvoice_hours": 1436.0,
4426
+ "commonvoice_locale": "eo",
4427
+ "population": {
4428
+ "SM": 301
4429
+ }
4430
  }
4431
  ]
uv.lock CHANGED
@@ -955,7 +955,7 @@ requires-dist = [
955
  { name = "gradio", specifier = ">=5.16.2" },
956
  { name = "pandas", specifier = ">=2.2.3" },
957
  { name = "plotly", specifier = ">=6.0.0" },
958
- { name = "pycountry" },
959
  ]
960
 
961
  [package.metadata.requires-dev]
 
955
  { name = "gradio", specifier = ">=5.16.2" },
956
  { name = "pandas", specifier = ">=2.2.3" },
957
  { name = "plotly", specifier = ">=6.0.0" },
958
+ { name = "pycountry", specifier = ">=24.6.1" },
959
  ]
960
 
961
  [package.metadata.requires-dev]