kargaranamir commited on
Commit
1ed2958
Β·
1 Parent(s): 1a5b35c

add azb, bho, kan, kin, lug, luo, nya, srd and edit brh

Browse files
languages/azb_Arab.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Language Name": "South-Azerbaijani",
3
+ "Family": "Turkic",
4
+ "Subgrouping": "Oghuz",
5
+ "Number of Speakers": "14_000_000",
6
+ "Supported by allenai/MADLAD-400": 0,
7
+ "Supported by facebook/flores": 1,
8
+ "Supported by cis-lmu/Glot500": 1,
9
+ "Sites": [
10
+ {
11
+ "Site Name": "trt.net.tr",
12
+ "Site URL": "https://www.trt.net.tr/turki/",
13
+ "Category": "news",
14
+ "Confidence": "🟩",
15
+ "Info": "native speaker confirmation",
16
+ "Possible Parallel Languages": "many",
17
+ "Links": []
18
+ },
19
+ {
20
+ "Site Name": "azb.wikipedia.org",
21
+ "Site URL": "https://azb.wikipedia.org/wiki/",
22
+ "Category": "articles",
23
+ "Confidence": "🟩",
24
+ "Info": "confirmed by webpage metadata",
25
+ "Possible Parallel Languages": "many",
26
+ "Links": []
27
+ }
28
+ ]
29
+ }
languages/bho_Deva.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Language Name": "Bhojpuri",
3
+ "Family": "Indo-European",
4
+ "Subgrouping": "Eastern Indo-Aryan",
5
+ "Number of Speakers": "51_000_000",
6
+ "Supported by allenai/MADLAD-400": 1,
7
+ "Supported by facebook/flores": 1,
8
+ "Supported by cis-lmu/Glot500": 0,
9
+ "Sites": [
10
+ {
11
+ "Site Name": "anjoria.com",
12
+ "Site URL": "https://www.anjoria.com/news_in_bhojpuri",
13
+ "Category": "news",
14
+ "Confidence": "🟩",
15
+ "Info": "confirmed by webpage metadata",
16
+ "Possible Parallel Languages": "",
17
+ "Links": []
18
+ },
19
+ {
20
+ "Site Name": "bhojpuri.blogspot.com",
21
+ "Site URL": "https://bhojpuri.blogspot.com",
22
+ "Category": "blog",
23
+ "Confidence": "🟩",
24
+ "Info": "confirmed by webpage metadata",
25
+ "Possible Parallel Languages": "",
26
+ "Links": []
27
+ },
28
+ {
29
+ "Site Name": "jogira.com",
30
+ "Site URL": "https://jogira.com/",
31
+ "Category": "news",
32
+ "Confidence": "🟩",
33
+ "Info": "confirmed by webpage metadata",
34
+ "Possible Parallel Languages": "",
35
+ "Links": []
36
+ }
37
+ ]
38
+ }
languages/brh_Arab.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "Language Name": "Brahui",
3
  "Family": "Dravidian",
4
- "Subgrouping": "",
5
  "Number of Speakers": "2_800_000",
6
  "Supported by allenai/MADLAD-400": 0,
7
  "Supported by facebook/flores": 0,
 
1
  {
2
  "Language Name": "Brahui",
3
  "Family": "Dravidian",
4
+ "Subgrouping": "Northern Dravidian",
5
  "Number of Speakers": "2_800_000",
6
  "Supported by allenai/MADLAD-400": 0,
7
  "Supported by facebook/flores": 0,
languages/kan_Knda.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Language Name": "Kannada",
3
+ "Family": "Dravidian",
4
+ "Subgrouping": "Southern Dravidian",
5
+ "Number of Speakers": "44_000_000",
6
+ "Supported by allenai/MADLAD-400": 1,
7
+ "Supported by facebook/flores": 1,
8
+ "Supported by cis-lmu/Glot500": 1,
9
+ "Sites": [
10
+ {
11
+ "Site Name": "vijaykarnataka.com",
12
+ "Site URL": "https://vijaykarnataka.com",
13
+ "Category": "news",
14
+ "Confidence": "🟩",
15
+ "Info": "confirmed by webpage metadata and glotlid",
16
+ "Possible Parallel Languages": "",
17
+ "Links": []
18
+ }
19
+ ]
20
+ }
languages/kin_Latn.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Language Name": "Kinyarwanda",
3
+ "Family": "Niger-Congo",
4
+ "Subgrouping": "Bantu",
5
+ "Number of Speakers": "15_000_000",
6
+ "Supported by allenai/MADLAD-400": 1,
7
+ "Supported by facebook/flores": 1,
8
+ "Supported by cis-lmu/Glot500": 1,
9
+ "Sites": [
10
+ {
11
+ "Site Name": "kigalitoday.com",
12
+ "Site URL": "https://www.kigalitoday.com/",
13
+ "Category": "news",
14
+ "Confidence": "🟩",
15
+ "Info": "confirmed by webpage metadata and glotlid",
16
+ "Possible Parallel Languages": "",
17
+ "Links": []
18
+ },
19
+ {
20
+ "Site Name": "igihe.com",
21
+ "Site URL": "https://igihe.com/index.php",
22
+ "Category": "news",
23
+ "Confidence": "🟩",
24
+ "Info": "confirmed by webpage metadata and glotlid",
25
+ "Possible Parallel Languages": "eng_Latn, fra_Latn",
26
+ "Links": []
27
+ }
28
+
29
+ ]
30
+ }
languages/lug_Latn.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Language Name": "Ganda",
3
+ "Family": "Niger-Congo",
4
+ "Subgrouping": "Bantu",
5
+ "Number of Speakers": "5_600_000",
6
+ "Supported by allenai/MADLAD-400": 1,
7
+ "Supported by facebook/flores": 1,
8
+ "Supported by cis-lmu/Glot500": 1,
9
+ "Sites": [
10
+ {
11
+ "Site Name": "bukedde.co.ug",
12
+ "Site URL": "https://www.bukedde.co.ug/category/amawulire",
13
+ "Category": "news",
14
+ "Confidence": "🟩",
15
+ "Info": "confirmed by webpage metadata and glotlid",
16
+ "Possible Parallel Languages": "",
17
+ "Links": []
18
+ },
19
+ {
20
+ "Site Name": "gambuuze.ug",
21
+ "Site URL": "https://gambuuze.ug/",
22
+ "Category": "news",
23
+ "Confidence": "🟩",
24
+ "Info": "confirmed by webpage metadata and glotlid",
25
+ "Possible Parallel Languages": "",
26
+ "Links": []
27
+ }
28
+ ]
29
+ }
languages/luo_Latn.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Language Name": "Luo (Kenya and Tanzania)",
3
+ "Family": "Nilo-Saharan",
4
+ "Subgrouping": "Nilotic",
5
+ "Number of Speakers": "4_200_000",
6
+ "Supported by allenai/MADLAD-400": 1,
7
+ "Supported by facebook/flores": 1,
8
+ "Supported by cis-lmu/Glot500": 1,
9
+ "Sites": [
10
+ {
11
+ "Site Name": "rmsradio.co.ke",
12
+ "Site URL": "https://rmsradio.co.ke/brands/ramogi-fm/",
13
+ "Category": "radio station, news",
14
+ "Confidence": "🟩",
15
+ "Info": "confirmed by webpage metadata and glotlid",
16
+ "Possible Parallel Languages": "",
17
+ "Links": []
18
+ }
19
+ ]
20
+ }
languages/nya_Latn.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Language Name": "Nyanja",
3
+ "Family": "Niger-Congo",
4
+ "Subgrouping": "Bantu",
5
+ "Number of Speakers": "7_000_000",
6
+ "Supported by allenai/MADLAD-400": 1,
7
+ "Supported by facebook/flores": 1,
8
+ "Supported by cis-lmu/Glot500": 1,
9
+ "Sites": [
10
+ {
11
+ "Site Name": "mwnation.com",
12
+ "Site URL": "https://mwnation.com/section/chichewa/",
13
+ "Category": "news",
14
+ "Confidence": "🟩",
15
+ "Info": "confirmed by webpage metadata and glotlid",
16
+ "Possible Parallel Languages": "",
17
+ "Links": []
18
+ },
19
+ {
20
+ "Site Name": "radiomaria.mw",
21
+ "Site URL": "https://www.radiomaria.mw/",
22
+ "Category": "radio station, news",
23
+ "Confidence": "🟩",
24
+ "Info": "confirmed by webpage metadata and glotlid",
25
+ "Possible Parallel Languages": "",
26
+ "Links": []
27
+ }
28
+ ]
29
+ }
languages/srd_Latn.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Language Name": "Sardinian",
3
+ "Family": "Indo-European",
4
+ "Subgrouping": "Italic",
5
+ "Number of Speakers": "1_000_000",
6
+ "Supported by allenai/MADLAD-400": 1,
7
+ "Supported by facebook/flores": 1,
8
+ "Supported by cis-lmu/Glot500": 1,
9
+ "Sites": [
10
+ {
11
+ "Site Name": "salimbasarda.net",
12
+ "Site URL": "https://salimbasarda.net/",
13
+ "Category": "culture",
14
+ "Confidence": "🟩",
15
+ "Info": "confirmed by webpage metadata and glotlid",
16
+ "Possible Parallel Languages": "",
17
+ "Links": []
18
+ },
19
+ {
20
+ "Site Name": "ilminuto.info",
21
+ "Site URL": "https://www.ilminuto.info/sc/",
22
+ "Category": "news",
23
+ "Confidence": "🟩",
24
+ "Info": "confirmed by webpage metadata and glotlid",
25
+ "Possible Parallel Languages": "ita_Latn",
26
+ "Links": []
27
+ },
28
+ {
29
+ "Site Name": "sc.wikipedia.org",
30
+ "Site URL": "https://sc.wikipedia.org/wiki/",
31
+ "Category": "articles",
32
+ "Confidence": "🟩",
33
+ "Info": "confirmed by webpage metadata",
34
+ "Possible Parallel Languages": "many",
35
+ "Links": []
36
+ }
37
+ ]
38
+ }