SalomonMetre13 commited on
Commit
61a0e9f
·
verified ·
1 Parent(s): 419b5dd

Training in progress, step 1000

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
adapter_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "facebook/nllb-200-distilled-600M",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": [
22
+ "lm_head"
23
+ ],
24
+ "peft_type": "LORA",
25
+ "r": 16,
26
+ "rank_pattern": {},
27
+ "revision": null,
28
+ "target_modules": [
29
+ "v_proj",
30
+ "out_proj",
31
+ "q_proj",
32
+ "k_proj"
33
+ ],
34
+ "task_type": "SEQ_2_SEQ_LM",
35
+ "trainable_token_indices": null,
36
+ "use_dora": false,
37
+ "use_rslora": false
38
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4617fde5bdb9e1935b0627fef483934a0cd6546ec7fd3e35eafe6d17d3b2754f
3
+ size 4231063584
added_tokens.json ADDED
@@ -0,0 +1,887 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Abafilistini": 256259,
3
+ "Abanya": 256710,
4
+ "Abayahudi": 256283,
5
+ "Abrahamu": 256312,
6
+ "Absa": 257033,
7
+ "Akabà": 257022,
8
+ "Alaga": 256857,
9
+ "Amango": 256588,
10
+ "Amoni": 256729,
11
+ "Anaci": 256842,
12
+ "Aroni": 256803,
13
+ "Babi": 257075,
14
+ "Babiloni": 256361,
15
+ "Banaci": 256705,
16
+ "Benya": 256835,
17
+ "Benyamini": 256291,
18
+ "Bulya": 256914,
19
+ "Bulyâla": 256452,
20
+ "Buyahudi": 256331,
21
+ "Cependant": 256242,
22
+ "Cikwône": 256390,
23
+ "Daudi": 256883,
24
+ "Efrayimu": 256315,
25
+ "Emirhwe": 256430,
26
+ "Enyuma": 256549,
27
+ "Faraoni": 256442,
28
+ "Galadi": 256558,
29
+ "Israheli": 256318,
30
+ "Israélites": 256220,
31
+ "Izaki": 256977,
32
+ "Jérusalem": 256237,
33
+ "Kandi": 256751,
34
+ "Kanâni": 256599,
35
+ "Kurhenga": 256363,
36
+ "Kurhi": 256989,
37
+ "Larha": 256991,
38
+ "Lêro": 257038,
39
+ "Maintenant": 256211,
40
+ "Malahika": 256349,
41
+ "Menashè": 256481,
42
+ "Mowabu": 256680,
43
+ "Murha": 256944,
44
+ "Mutagatîfu": 256269,
45
+ "Mwene": 256893,
46
+ "Mwâmi": 256950,
47
+ "Mâshi": 257009,
48
+ "Mîsiri": 256539,
49
+ "Ngasi": 256779,
50
+ "Nnâmahanga": 256266,
51
+ "Nyakasane": 256303,
52
+ "Nyamu": 256985,
53
+ "Nyamubâho": 256299,
54
+ "Nyamuzinda": 256268,
55
+ "Obwo": 257052,
56
+ "Okubundi": 256364,
57
+ "Okuhandi": 256335,
58
+ "Olubaga": 256386,
59
+ "Omuntu": 256515,
60
+ "Paolo": 256807,
61
+ "Petro": 256785,
62
+ "Philistins": 256214,
63
+ "Pourtant": 256247,
64
+ "Sabato": 256608,
65
+ "Salomoni": 256355,
66
+ "Samweli": 256463,
67
+ "Samâriya": 256306,
68
+ "Saulu": 256817,
69
+ "Simoni": 256660,
70
+ "Siyoni": 256702,
71
+ "Sîriya": 256678,
72
+ "Yakô": 257065,
73
+ "Yakôbo": 256584,
74
+ "Yeruzale": 256360,
75
+ "Yeruzalemu": 256265,
76
+ "Yordani": 256418,
77
+ "Yowa": 257061,
78
+ "Yowabu": 256551,
79
+ "Yowane": 256516,
80
+ "Yoze": 257031,
81
+ "Yozefu": 256612,
82
+ "Yozwè": 256805,
83
+ "Yâgirwa": 256432,
84
+ "Yônatani": 256350,
85
+ "Yûda": 257048,
86
+ "__fra_Latn__": 256204,
87
+ "__shr_Latn__": 256205,
88
+ "ababusi": 256427,
89
+ "ababwîra": 256327,
90
+ "abadâhwa": 256356,
91
+ "abagala": 256440,
92
+ "abaganda": 256316,
93
+ "abagula": 256479,
94
+ "abakazi": 256494,
95
+ "abalwî": 256688,
96
+ "abalêbi": 256383,
97
+ "abarhambo": 256298,
98
+ "abashombanyi": 256261,
99
+ "abasirika": 256300,
100
+ "abazimu": 256491,
101
+ "abirhi": 256617,
102
+ "abiri": 256994,
103
+ "aburha": 256564,
104
+ "abwîra": 256583,
105
+ "abâli": 256962,
106
+ "abâmi": 256988,
107
+ "abâna": 256781,
108
+ "abôla": 256904,
109
+ "aderha": 256618,
110
+ "adesire": 256428,
111
+ "adwîrhe": 256473,
112
+ "agend": 256846,
113
+ "agwêrhe": 256439,
114
+ "agôla": 256755,
115
+ "ahantu": 256701,
116
+ "ahika": 256936,
117
+ "ahâli": 256937,
118
+ "ahôla": 256954,
119
+ "ajira": 256798,
120
+ "akabà": 256778,
121
+ "akalamo": 256369,
122
+ "akantu": 256548,
123
+ "akanwa": 256668,
124
+ "allèrent": 256253,
125
+ "amaboko": 256474,
126
+ "amabuye": 256499,
127
+ "amabî": 256952,
128
+ "amagulu": 256486,
129
+ "amalunga": 256351,
130
+ "amango": 256538,
131
+ "amarh": 256956,
132
+ "amarhale": 256353,
133
+ "amarhegeko": 256272,
134
+ "amashanja": 256287,
135
+ "amasholo": 256329,
136
+ "amasù": 256828,
137
+ "amavurha": 256340,
138
+ "aminjà": 256723,
139
+ "amubwîra": 256362,
140
+ "amîshi": 256590,
141
+ "anaba": 256979,
142
+ "anaci": 256983,
143
+ "anaciba": 256457,
144
+ "anacibabwîra": 256256,
145
+ "anacibwîra": 256271,
146
+ "anaciderha": 256264,
147
+ "anacimu": 256461,
148
+ "anacimubwîra": 256258,
149
+ "anacishuza": 256274,
150
+ "anaku": 256876,
151
+ "anamu": 256824,
152
+ "arhaci": 256595,
153
+ "arhali": 256614,
154
+ "arhana": 256594,
155
+ "arhanka": 256490,
156
+ "arhanu": 256572,
157
+ "arhenga": 256476,
158
+ "arhuma": 256677,
159
+ "arhôla": 256666,
160
+ "arrivèrent": 256210,
161
+ "asharhu": 256368,
162
+ "ashub": 256730,
163
+ "ashubi": 256649,
164
+ "ashuza": 256524,
165
+ "ayish": 256923,
166
+ "ayisha": 256615,
167
+ "ayumva": 256571,
168
+ "babirhi": 256462,
169
+ "babiri": 256521,
170
+ "babona": 256610,
171
+ "babwîra": 256426,
172
+ "baderha": 256500,
173
+ "badwîrhe": 256347,
174
+ "bagala": 256544,
175
+ "bagendi": 256400,
176
+ "baguma": 256566,
177
+ "bagwa": 256792,
178
+ "bahika": 256636,
179
+ "bajira": 256694,
180
+ "bakazi": 256604,
181
+ "bakola": 256573,
182
+ "bakulu": 256628,
183
+ "balume": 256650,
184
+ "balya": 256849,
185
+ "bambali": 256406,
186
+ "banaci": 256720,
187
+ "banacimu": 256333,
188
+ "banali": 256505,
189
+ "banji": 256756,
190
+ "banka": 256766,
191
+ "banyamuzinda": 256260,
192
+ "banyi": 256724,
193
+ "barha": 256959,
194
+ "barhambo": 256344,
195
+ "barhana": 256469,
196
+ "barhu": 256815,
197
+ "basho": 256922,
198
+ "bayish": 256518,
199
+ "bayisha": 256377,
200
+ "bayumva": 256376,
201
+ "bihugo": 256622,
202
+ "bihumbi": 256456,
203
+ "bijingo": 256482,
204
+ "binali": 256554,
205
+ "bindi": 256818,
206
+ "binji": 256986,
207
+ "binjà": 256970,
208
+ "binwa": 256743,
209
+ "binyu": 256750,
210
+ "birha": 256830,
211
+ "birugu": 256687,
212
+ "birya": 256996,
213
+ "bisha": 256932,
214
+ "bishagala": 256295,
215
+ "bombi": 256855,
216
+ "bonaga": 256627,
217
+ "bonekana": 256308,
218
+ "boshi": 256749,
219
+ "budufu": 256647,
220
+ "buhashe": 256420,
221
+ "bulya": 256899,
222
+ "bulyâla": 256425,
223
+ "bundi": 256982,
224
+ "bunôla": 256560,
225
+ "burha": 256890,
226
+ "burhambi": 256314,
227
+ "burhanzi": 256343,
228
+ "burhuma": 256501,
229
+ "burhè": 256906,
230
+ "busha": 256851,
231
+ "buzibu": 256692,
232
+ "buzinda": 256392,
233
+ "buzira": 256528,
234
+ "buzûka": 256629,
235
+ "bwenêne": 256421,
236
+ "bwinjà": 256663,
237
+ "bwinyu": 256556,
238
+ "bwâbo": 256949,
239
+ "bwâge": 256868,
240
+ "bwâmi": 256753,
241
+ "bwâni": 256733,
242
+ "bwâwe": 256816,
243
+ "bwîne": 256736,
244
+ "bwîra": 256935,
245
+ "bwîre": 256874,
246
+ "bwîzire": 256455,
247
+ "byoshi": 256541,
248
+ "byàli": 256948,
249
+ "byâbo": 256958,
250
+ "byâge": 256738,
251
+ "byâha": 256757,
252
+ "byâni": 256859,
253
+ "byâwe": 256901,
254
+ "bâge": 257050,
255
+ "bâli": 257088,
256
+ "bêra": 257085,
257
+ "bîrhu": 256725,
258
+ "bûko": 257041,
259
+ "cici": 257034,
260
+ "ciguma": 256681,
261
+ "cihugo": 256643,
262
+ "cira": 257035,
263
+ "cirha": 256881,
264
+ "cirhumire": 256284,
265
+ "ciru": 257077,
266
+ "cirya": 256860,
267
+ "cishagala": 256282,
268
+ "citabu": 256543,
269
+ "combattre": 256236,
270
+ "commandements": 256206,
271
+ "communion": 256234,
272
+ "compagnons": 256216,
273
+ "confiance": 256227,
274
+ "connaître": 256231,
275
+ "contraire": 256241,
276
+ "coshi": 256758,
277
+ "câwe": 257082,
278
+ "cîmba": 257008,
279
+ "cîsho": 256871,
280
+ "demandèrent": 256209,
281
+ "derha": 256827,
282
+ "derhaga": 256405,
283
+ "derhe": 256895,
284
+ "derho": 256802,
285
+ "derhwa": 256676,
286
+ "descendants": 256207,
287
+ "desire": 256534,
288
+ "disciples": 256243,
289
+ "dwîrhe": 256620,
290
+ "dâhwa": 256939,
291
+ "dôsa": 257066,
292
+ "ebibuzi": 256395,
293
+ "ebijiro": 256422,
294
+ "ebinwa": 256525,
295
+ "ebirugu": 256449,
296
+ "ebiryo": 256508,
297
+ "ebishagala": 256267,
298
+ "eburhambi": 256281,
299
+ "ebyâ": 257049,
300
+ "ebyâha": 256721,
301
+ "ebyôla": 256606,
302
+ "ecihugo": 256407,
303
+ "ecâha": 256726,
304
+ "ecôla": 256924,
305
+ "ekarhî": 256602,
306
+ "emalunga": 256310,
307
+ "embere": 256537,
308
+ "embuga": 256546,
309
+ "emburho": 256394,
310
+ "emikolo": 256465,
311
+ "emilala": 256419,
312
+ "emira": 256763,
313
+ "emirasano": 256297,
314
+ "emirhi": 256582,
315
+ "emisî": 256884,
316
+ "empingu": 256409,
317
+ "emunda": 256665,
318
+ "emyambalo": 256301,
319
+ "emyâka": 256633,
320
+ "endagâno": 256354,
321
+ "engabo": 256523,
322
+ "engano": 256718,
323
+ "engôrho": 256417,
324
+ "enjira": 256637,
325
+ "enshonyi": 256339,
326
+ "ensiku": 256532,
327
+ "entambala": 256288,
328
+ "enterekêro": 256273,
329
+ "entondo": 256403,
330
+ "entumwa": 256484,
331
+ "enyanja": 256373,
332
+ "enyanya": 256487,
333
+ "enyuma": 256529,
334
+ "enyumpa": 256408,
335
+ "enêne": 256819,
336
+ "erhali": 256545,
337
+ "eyôla": 256960,
338
+ "familles": 256252,
339
+ "farasi": 256634,
340
+ "filistini": 256277,
341
+ "galugalu": 256341,
342
+ "galuka": 256713,
343
+ "galuke": 256581,
344
+ "galya": 256945,
345
+ "gandabuzi": 256278,
346
+ "ganyi": 256853,
347
+ "garha": 256752,
348
+ "gatîfu": 256609,
349
+ "gendi": 256864,
350
+ "gendo": 256889,
351
+ "geramwo": 256411,
352
+ "ginyu": 256887,
353
+ "gire": 257036,
354
+ "girwa": 256947,
355
+ "gisha": 256825,
356
+ "goma": 257039,
357
+ "gomi": 257073,
358
+ "goshi": 256987,
359
+ "gulya": 256832,
360
+ "gurha": 256918,
361
+ "guza": 257070,
362
+ "gwoshi": 256631,
363
+ "gwàli": 256775,
364
+ "gwâbo": 256773,
365
+ "gwâge": 256765,
366
+ "gwâni": 256748,
367
+ "gwârha": 256504,
368
+ "gwâsirwe": 256321,
369
+ "gwâwe": 256797,
370
+ "gwêrhe": 256623,
371
+ "gwêrhi": 256664,
372
+ "gâge": 257064,
373
+ "gâwe": 257032,
374
+ "gûla": 257071,
375
+ "habitants": 256233,
376
+ "haguma": 256547,
377
+ "halya": 256894,
378
+ "hamagala": 256337,
379
+ "hamîri": 256690,
380
+ "hamîrizi": 256342,
381
+ "hango": 256848,
382
+ "hantu": 256963,
383
+ "harâmya": 256497,
384
+ "hash": 257060,
385
+ "hashe": 256769,
386
+ "hembe": 256745,
387
+ "hembo": 256968,
388
+ "hemu": 257069,
389
+ "herêre": 256674,
390
+ "himbi": 257005,
391
+ "hindu": 256834,
392
+ "hindula": 256444,
393
+ "hirakwo": 256372,
394
+ "hire": 257051,
395
+ "hirhi": 256806,
396
+ "hoshi": 256879,
397
+ "huluka": 256616,
398
+ "humbi": 256886,
399
+ "hyâhya": 256679,
400
+ "hâbwa": 256800,
401
+ "hâna": 257053,
402
+ "hêka": 257074,
403
+ "hêma": 257030,
404
+ "hêrero": 256509,
405
+ "hîra": 257029,
406
+ "hîraga": 256644,
407
+ "hôfi": 257054,
408
+ "ibirhi": 256542,
409
+ "iburha": 256658,
410
+ "ibuye": 256861,
411
+ "idaho": 256841,
412
+ "idako": 257010,
413
+ "iderha": 256651,
414
+ "idivayi": 256371,
415
+ "igulu": 256776,
416
+ "ihano": 256777,
417
+ "ikumi": 256862,
418
+ "intellig": 256246,
419
+ "irenge": 256563,
420
+ "irhanu": 256706,
421
+ "irhegeko": 256326,
422
+ "irhondo": 256415,
423
+ "irhwe": 256790,
424
+ "iriba": 256907,
425
+ "irugu": 256891,
426
+ "irungu": 256561,
427
+ "ishanja": 256475,
428
+ "isharhu": 256431,
429
+ "ishiriza": 256365,
430
+ "izire": 256934,
431
+ "izîno": 256909,
432
+ "izûba": 256796,
433
+ "jingo": 256810,
434
+ "jiraga": 256689,
435
+ "jirira": 256570,
436
+ "jirire": 256619,
437
+ "jiro": 257072,
438
+ "jizire": 256638,
439
+ "kabanda": 256496,
440
+ "kabirhi": 256489,
441
+ "kantu": 256783,
442
+ "kanwa": 256852,
443
+ "karhî": 257004,
444
+ "kasane": 256600,
445
+ "kasharhu": 256304,
446
+ "kazâg": 256811,
447
+ "kengêra": 256460,
448
+ "kengêre": 256492,
449
+ "kenyi": 256882,
450
+ "keza": 257078,
451
+ "kiyahu": 256557,
452
+ "kogo": 257063,
453
+ "kolaga": 256714,
454
+ "kolera": 256511,
455
+ "kolê": 257079,
456
+ "konene": 256562,
457
+ "koshi": 256760,
458
+ "kubî": 257045,
459
+ "kuderha": 256412,
460
+ "kuguma": 256580,
461
+ "kuhika": 256699,
462
+ "kujira": 256684,
463
+ "kukwânîne": 256293,
464
+ "kulikira": 256328,
465
+ "kulusha": 256435,
466
+ "kulya": 256878,
467
+ "kulûza": 256597,
468
+ "kurha": 256931,
469
+ "kurhenga": 256358,
470
+ "kurhi": 256965,
471
+ "kurhuma": 256459,
472
+ "kuza": 257037,
473
+ "kwinjà": 256626,
474
+ "kwâge": 257025,
475
+ "kwânîne": 256370,
476
+ "kwâwe": 256731,
477
+ "kwêru": 256976,
478
+ "kwône": 256951,
479
+ "kwônene": 256472,
480
+ "lahika": 256613,
481
+ "langash": 256414,
482
+ "langashane": 256270,
483
+ "lange": 256946,
484
+ "langâ": 256964,
485
+ "langâlire": 256290,
486
+ "lanzi": 256772,
487
+ "larha": 256789,
488
+ "lendemain": 256240,
489
+ "lenge": 256844,
490
+ "leviti": 256712,
491
+ "liguma": 256513,
492
+ "linene": 256540,
493
+ "listini": 256436,
494
+ "longtemps": 256225,
495
+ "lonza": 257027,
496
+ "lonzize": 256404,
497
+ "loshi": 256867,
498
+ "lubaga": 256704,
499
+ "lubero": 256646,
500
+ "luguma": 256700,
501
+ "luhêrero": 256324,
502
+ "lukulu": 256550,
503
+ "lulya": 257000,
504
+ "lunda": 256897,
505
+ "lunga": 256925,
506
+ "lusiku": 256601,
507
+ "lwoshi": 256703,
508
+ "lwàli": 256999,
509
+ "lwâbo": 256780,
510
+ "lwâge": 256822,
511
+ "lwâla": 256744,
512
+ "lwâni": 256786,
513
+ "lwâwe": 256892,
514
+ "lwîsa": 256761,
515
+ "lwîshi": 256559,
516
+ "lyoshi": 256698,
517
+ "lyâbo": 256814,
518
+ "lyâge": 257016,
519
+ "lyâla": 256926,
520
+ "lyâni": 256829,
521
+ "lyâwe": 256794,
522
+ "lêrha": 256953,
523
+ "lîre": 257056,
524
+ "maboko": 256673,
525
+ "magana": 256591,
526
+ "mahanga": 256401,
527
+ "maintenant": 256213,
528
+ "makumi": 256641,
529
+ "maligo": 256693,
530
+ "manga": 257019,
531
+ "mango": 256990,
532
+ "manifest": 256254,
533
+ "manji": 256992,
534
+ "manyi": 256973,
535
+ "manyire": 256387,
536
+ "manyiri": 256382,
537
+ "manyî": 256804,
538
+ "manyîsa": 256391,
539
+ "manyîso": 256488,
540
+ "mashanja": 256311,
541
+ "masholo": 256380,
542
+ "mbaga": 256995,
543
+ "mbala": 256971,
544
+ "mbali": 257001,
545
+ "mbalo": 256869,
546
+ "mbira": 256795,
547
+ "mbire": 257018,
548
+ "mbwîra": 256671,
549
+ "mbûla": 256774,
550
+ "minjà": 256980,
551
+ "miyahu": 256503,
552
+ "montagnes": 256223,
553
+ "mubiri": 256607,
554
+ "mubwîra": 256470,
555
+ "mudâhwa": 256445,
556
+ "mugala": 256536,
557
+ "muguma": 256683,
558
+ "muhango": 256464,
559
+ "mukazi": 256535,
560
+ "mukolo": 256630,
561
+ "mukulu": 256648,
562
+ "mukâge": 256661,
563
+ "mulala": 256578,
564
+ "muliro": 256512,
565
+ "mulume": 256707,
566
+ "munâni": 256639,
567
+ "murha": 256975,
568
+ "murhambo": 256307,
569
+ "murhi": 256838,
570
+ "murhima": 256424,
571
+ "muzi": 257080,
572
+ "muzinda": 256471,
573
+ "mwambali": 256366,
574
+ "mwana": 256957,
575
+ "mwandu": 256611,
576
+ "mwanka": 256711,
577
+ "mweli": 256913,
578
+ "mwenda": 256596,
579
+ "mwene": 256974,
580
+ "mweshi": 256585,
581
+ "mwinjà": 256526,
582
+ "mwâge": 256839,
583
+ "mwâka": 256793,
584
+ "mwâli": 256888,
585
+ "mwâmi": 256873,
586
+ "mwâna": 256981,
587
+ "mwêzi": 256808,
588
+ "myâka": 256997,
589
+ "mâriya": 256507,
590
+ "mâshi": 257017,
591
+ "mîshi": 256812,
592
+ "nago": 257067,
593
+ "namu": 257081,
594
+ "nanka": 257023,
595
+ "natani": 256652,
596
+ "naye": 257083,
597
+ "ndakwo": 256686,
598
+ "ndarhu": 256717,
599
+ "nfune": 256821,
600
+ "ngabo": 257002,
601
+ "ngahi": 257015,
602
+ "ngasi": 256732,
603
+ "nguke": 256843,
604
+ "ngulu": 256782,
605
+ "nguma": 256917,
606
+ "ngunu": 257011,
607
+ "niene": 256978,
608
+ "ninyu": 257012,
609
+ "niono": 256759,
610
+ "njinjà": 256553,
611
+ "nkola": 256799,
612
+ "nkuba": 256746,
613
+ "nnina": 256788,
614
+ "nombreux": 256245,
615
+ "nourriture": 256221,
616
+ "nshi": 257047,
617
+ "nshonyi": 256433,
618
+ "nsiku": 256858,
619
+ "ntebe": 256903,
620
+ "nterekêro": 256279,
621
+ "ntondo": 256567,
622
+ "ntwâli": 256552,
623
+ "ntyâla": 256586,
624
+ "ntyôla": 256669,
625
+ "ntâco": 257003,
626
+ "ntâye": 256921,
627
+ "nyamuzinda": 256276,
628
+ "nyinji": 256672,
629
+ "nyumpa": 256592,
630
+ "nywa": 257044,
631
+ "nywesi": 256657,
632
+ "nênè": 257058,
633
+ "nîrhu": 256836,
634
+ "obubî": 256747,
635
+ "obuhanya": 256367,
636
+ "obuhashe": 256336,
637
+ "obuku": 256928,
638
+ "obukunizi": 256296,
639
+ "oburh": 256916,
640
+ "oburhimanya": 256262,
641
+ "oburhè": 256716,
642
+ "obushinganyanya": 256255,
643
+ "obusù": 256813,
644
+ "obuyêmêre": 256289,
645
+ "obuzîne": 256416,
646
+ "obwonjo": 256410,
647
+ "obwâmi": 256662,
648
+ "obwôla": 256685,
649
+ "obûko": 256742,
650
+ "offrandes": 256228,
651
+ "ogwêrhe": 256397,
652
+ "ogwôla": 256589,
653
+ "okuboko": 256374,
654
+ "okubà": 256826,
655
+ "okucî": 256929,
656
+ "okujira": 256477,
657
+ "okujà": 256767,
658
+ "okuli": 256872,
659
+ "okumu": 256809,
660
+ "okunali": 256384,
661
+ "okwôla": 256519,
662
+ "olubaga": 256493,
663
+ "olubanja": 256305,
664
+ "olufù": 256875,
665
+ "olugo": 256984,
666
+ "oluhêrero": 256286,
667
+ "olunda": 256569,
668
+ "olusiku": 256423,
669
+ "omubiri": 256396,
670
+ "omucîmba": 256317,
671
+ "omudâhwa": 256319,
672
+ "omugala": 256467,
673
+ "omugati": 256378,
674
+ "omuguma": 256393,
675
+ "omukazi": 256458,
676
+ "omukolo": 256450,
677
+ "omukò": 256885,
678
+ "omulala": 256468,
679
+ "omuli": 256896,
680
+ "omuliro": 256399,
681
+ "omulêbi": 256451,
682
+ "omurhima": 256352,
683
+ "omurhwe": 256379,
684
+ "omurhûla": 256313,
685
+ "omusi": 256967,
686
+ "omusisi": 256388,
687
+ "omwâmi": 256576,
688
+ "omwâna": 256587,
689
+ "omwêzi": 256593,
690
+ "omûka": 256866,
691
+ "orha": 257084,
692
+ "owabu": 256969,
693
+ "owundi": 256574,
694
+ "oyôla": 257026,
695
+ "pagan": 256870,
696
+ "pagani": 256709,
697
+ "paraître": 256248,
698
+ "population": 256219,
699
+ "prophète": 256251,
700
+ "prophètes": 256232,
701
+ "présenter": 256244,
702
+ "puissance": 256235,
703
+ "quarante": 256249,
704
+ "raheli": 256530,
705
+ "rdani": 256840,
706
+ "rekêro": 256568,
707
+ "rencontre": 256222,
708
+ "rhabana": 256429,
709
+ "rhabâ": 256791,
710
+ "rhale": 256943,
711
+ "rhali": 256764,
712
+ "rhambi": 256682,
713
+ "rhambo": 256640,
714
+ "rhana": 256801,
715
+ "rhang": 256728,
716
+ "rhangâ": 256654,
717
+ "rhanka": 256691,
718
+ "rhanu": 256762,
719
+ "rhanzi": 256506,
720
+ "rheganyi": 256346,
721
+ "rhege": 256737,
722
+ "rhegeka": 256466,
723
+ "rhegekaga": 256292,
724
+ "rhegeko": 256498,
725
+ "rhegesi": 256441,
726
+ "rhegesire": 256280,
727
+ "rhenga": 256719,
728
+ "rhengaga": 256320,
729
+ "rhenge": 256605,
730
+ "rhenza": 256722,
731
+ "rhera": 256734,
732
+ "rhere": 256850,
733
+ "rherek": 256695,
734
+ "rherekêra": 256294,
735
+ "rhima": 256955,
736
+ "rhimanya": 256325,
737
+ "rhira": 256966,
738
+ "rhond": 256741,
739
+ "rhondo": 256517,
740
+ "rhondêra": 256359,
741
+ "rhuli": 257006,
742
+ "rhuma": 256911,
743
+ "rhumaga": 256495,
744
+ "rhumi": 256905,
745
+ "rhumira": 256437,
746
+ "rhumire": 256453,
747
+ "rhumisi": 256381,
748
+ "rhundu": 256696,
749
+ "rhurha": 256655,
750
+ "rhweshi": 256502,
751
+ "rhwiri": 256635,
752
+ "rhînya": 256514,
753
+ "rhîre": 256902,
754
+ "rhôla": 256831,
755
+ "rhôle": 256820,
756
+ "rhûla": 256833,
757
+ "répondirent": 256208,
758
+ "sacrifice": 256239,
759
+ "sacrifices": 256215,
760
+ "sagya": 256787,
761
+ "sanctuaire": 256212,
762
+ "saranga": 256447,
763
+ "senge": 256930,
764
+ "sengero": 256385,
765
+ "serviteur": 256224,
766
+ "serviteurs": 256217,
767
+ "seulement": 256226,
768
+ "shagala": 256413,
769
+ "shambala": 256323,
770
+ "shambâ": 256625,
771
+ "shanda": 256555,
772
+ "shangi": 256603,
773
+ "shanja": 256624,
774
+ "sharhu": 256575,
775
+ "shekera": 256448,
776
+ "shenga": 256645,
777
+ "shetani": 256402,
778
+ "shimba": 256522,
779
+ "shingan": 256446,
780
+ "shinganyanya": 256257,
781
+ "shingâ": 256670,
782
+ "shingânîne": 256263,
783
+ "sholo": 256941,
784
+ "shombanyi": 256285,
785
+ "shonyi": 256533,
786
+ "shosi": 257024,
787
+ "shubi": 256912,
788
+ "shubira": 256478,
789
+ "shugu": 256863,
790
+ "shusho": 256531,
791
+ "shuza": 256823,
792
+ "shôkano": 256438,
793
+ "shûbû": 256771,
794
+ "shûli": 256972,
795
+ "sigala": 256642,
796
+ "sigîre": 256675,
797
+ "simba": 256877,
798
+ "singôno": 256483,
799
+ "sirhe": 256727,
800
+ "sirika": 256598,
801
+ "sirwe": 256735,
802
+ "sungunu": 256434,
803
+ "sôkera": 256565,
804
+ "sôme": 257087,
805
+ "tabâ": 257043,
806
+ "tabâro": 256527,
807
+ "tagatîfu": 256357,
808
+ "territoire": 256218,
809
+ "terusi": 256656,
810
+ "tumu": 257068,
811
+ "vraiment": 256250,
812
+ "vurha": 256865,
813
+ "vêtements": 256229,
814
+ "wanka": 256837,
815
+ "winja": 256942,
816
+ "winyu": 257007,
817
+ "wundi": 256998,
818
+ "wâge": 257040,
819
+ "wâli": 257057,
820
+ "wâni": 257076,
821
+ "wîrhu": 256927,
822
+ "yambala": 256398,
823
+ "yandaga": 256389,
824
+ "yandisirwe": 256275,
825
+ "yanja": 256898,
826
+ "yanka": 257021,
827
+ "yankirira": 256302,
828
+ "yanzi": 256770,
829
+ "yigîri": 256653,
830
+ "yigîriza": 256348,
831
+ "yiman": 256854,
832
+ "yimanga": 256454,
833
+ "yinyu": 256739,
834
+ "yisha": 256910,
835
+ "yishi": 256900,
836
+ "yishire": 256443,
837
+ "yoshi": 256933,
838
+ "yumva": 257013,
839
+ "yumvagya": 256345,
840
+ "yumve": 256938,
841
+ "yumvi": 256920,
842
+ "yumvî": 256915,
843
+ "yumvîrhe": 256332,
844
+ "yumvîrhi": 256322,
845
+ "yunju": 256940,
846
+ "yunjula": 256485,
847
+ "yunjwîre": 256338,
848
+ "yâkira": 256632,
849
+ "yâni": 257055,
850
+ "yêmêra": 256579,
851
+ "yêmêre": 256708,
852
+ "yênene": 256621,
853
+ "yêrek": 256880,
854
+ "yêreka": 256667,
855
+ "yêrekana": 256330,
856
+ "yêrekîre": 256309,
857
+ "yêshi": 256961,
858
+ "yîrha": 256845,
859
+ "yîrhe": 256768,
860
+ "yîrika": 256697,
861
+ "yîsh": 257086,
862
+ "yîshi": 256740,
863
+ "yôboha": 256659,
864
+ "yôla": 257062,
865
+ "yôrha": 256993,
866
+ "yôrhe": 256919,
867
+ "yôsire": 256510,
868
+ "yûbaka": 256520,
869
+ "yûbakaga": 256334,
870
+ "yûsh": 257042,
871
+ "yûshûla": 256480,
872
+ "zagya": 257020,
873
+ "zigire": 256577,
874
+ "zimya": 257028,
875
+ "zinyu": 256908,
876
+ "zirwe": 256784,
877
+ "zirya": 256847,
878
+ "zoshi": 256856,
879
+ "zungulu": 256375,
880
+ "zâbîbu": 256715,
881
+ "zâge": 257046,
882
+ "zâgi": 257059,
883
+ "également": 256238,
884
+ "étrangers": 256230,
885
+ "êreza": 257014,
886
+ "ônene": 256754
887
+ }
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14bb8dfb35c0ffdea7bc01e56cea38b9e3d5efcdcb9c251d6b40538e1aab555a
3
+ size 4852054
special_tokens_map.json ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "ace_Arab",
4
+ "ace_Latn",
5
+ "acm_Arab",
6
+ "acq_Arab",
7
+ "aeb_Arab",
8
+ "afr_Latn",
9
+ "ajp_Arab",
10
+ "aka_Latn",
11
+ "amh_Ethi",
12
+ "apc_Arab",
13
+ "arb_Arab",
14
+ "ars_Arab",
15
+ "ary_Arab",
16
+ "arz_Arab",
17
+ "asm_Beng",
18
+ "ast_Latn",
19
+ "awa_Deva",
20
+ "ayr_Latn",
21
+ "azb_Arab",
22
+ "azj_Latn",
23
+ "bak_Cyrl",
24
+ "bam_Latn",
25
+ "ban_Latn",
26
+ "bel_Cyrl",
27
+ "bem_Latn",
28
+ "ben_Beng",
29
+ "bho_Deva",
30
+ "bjn_Arab",
31
+ "bjn_Latn",
32
+ "bod_Tibt",
33
+ "bos_Latn",
34
+ "bug_Latn",
35
+ "bul_Cyrl",
36
+ "cat_Latn",
37
+ "ceb_Latn",
38
+ "ces_Latn",
39
+ "cjk_Latn",
40
+ "ckb_Arab",
41
+ "crh_Latn",
42
+ "cym_Latn",
43
+ "dan_Latn",
44
+ "deu_Latn",
45
+ "dik_Latn",
46
+ "dyu_Latn",
47
+ "dzo_Tibt",
48
+ "ell_Grek",
49
+ "eng_Latn",
50
+ "epo_Latn",
51
+ "est_Latn",
52
+ "eus_Latn",
53
+ "ewe_Latn",
54
+ "fao_Latn",
55
+ "pes_Arab",
56
+ "fij_Latn",
57
+ "fin_Latn",
58
+ "fon_Latn",
59
+ "fra_Latn",
60
+ "fur_Latn",
61
+ "fuv_Latn",
62
+ "gla_Latn",
63
+ "gle_Latn",
64
+ "glg_Latn",
65
+ "grn_Latn",
66
+ "guj_Gujr",
67
+ "hat_Latn",
68
+ "hau_Latn",
69
+ "heb_Hebr",
70
+ "hin_Deva",
71
+ "hne_Deva",
72
+ "hrv_Latn",
73
+ "hun_Latn",
74
+ "hye_Armn",
75
+ "ibo_Latn",
76
+ "ilo_Latn",
77
+ "ind_Latn",
78
+ "isl_Latn",
79
+ "ita_Latn",
80
+ "jav_Latn",
81
+ "jpn_Jpan",
82
+ "kab_Latn",
83
+ "kac_Latn",
84
+ "kam_Latn",
85
+ "kan_Knda",
86
+ "kas_Arab",
87
+ "kas_Deva",
88
+ "kat_Geor",
89
+ "knc_Arab",
90
+ "knc_Latn",
91
+ "kaz_Cyrl",
92
+ "kbp_Latn",
93
+ "kea_Latn",
94
+ "khm_Khmr",
95
+ "kik_Latn",
96
+ "kin_Latn",
97
+ "kir_Cyrl",
98
+ "kmb_Latn",
99
+ "kon_Latn",
100
+ "kor_Hang",
101
+ "kmr_Latn",
102
+ "lao_Laoo",
103
+ "lvs_Latn",
104
+ "lij_Latn",
105
+ "lim_Latn",
106
+ "lin_Latn",
107
+ "lit_Latn",
108
+ "lmo_Latn",
109
+ "ltg_Latn",
110
+ "ltz_Latn",
111
+ "lua_Latn",
112
+ "lug_Latn",
113
+ "luo_Latn",
114
+ "lus_Latn",
115
+ "mag_Deva",
116
+ "mai_Deva",
117
+ "mal_Mlym",
118
+ "mar_Deva",
119
+ "min_Latn",
120
+ "mkd_Cyrl",
121
+ "plt_Latn",
122
+ "mlt_Latn",
123
+ "mni_Beng",
124
+ "khk_Cyrl",
125
+ "mos_Latn",
126
+ "mri_Latn",
127
+ "zsm_Latn",
128
+ "mya_Mymr",
129
+ "nld_Latn",
130
+ "nno_Latn",
131
+ "nob_Latn",
132
+ "npi_Deva",
133
+ "nso_Latn",
134
+ "nus_Latn",
135
+ "nya_Latn",
136
+ "oci_Latn",
137
+ "gaz_Latn",
138
+ "ory_Orya",
139
+ "pag_Latn",
140
+ "pan_Guru",
141
+ "pap_Latn",
142
+ "pol_Latn",
143
+ "por_Latn",
144
+ "prs_Arab",
145
+ "pbt_Arab",
146
+ "quy_Latn",
147
+ "ron_Latn",
148
+ "run_Latn",
149
+ "rus_Cyrl",
150
+ "sag_Latn",
151
+ "san_Deva",
152
+ "sat_Beng",
153
+ "scn_Latn",
154
+ "shn_Mymr",
155
+ "sin_Sinh",
156
+ "slk_Latn",
157
+ "slv_Latn",
158
+ "smo_Latn",
159
+ "sna_Latn",
160
+ "snd_Arab",
161
+ "som_Latn",
162
+ "sot_Latn",
163
+ "spa_Latn",
164
+ "als_Latn",
165
+ "srd_Latn",
166
+ "srp_Cyrl",
167
+ "ssw_Latn",
168
+ "sun_Latn",
169
+ "swe_Latn",
170
+ "swh_Latn",
171
+ "szl_Latn",
172
+ "tam_Taml",
173
+ "tat_Cyrl",
174
+ "tel_Telu",
175
+ "tgk_Cyrl",
176
+ "tgl_Latn",
177
+ "tha_Thai",
178
+ "tir_Ethi",
179
+ "taq_Latn",
180
+ "taq_Tfng",
181
+ "tpi_Latn",
182
+ "tsn_Latn",
183
+ "tso_Latn",
184
+ "tuk_Latn",
185
+ "tum_Latn",
186
+ "tur_Latn",
187
+ "twi_Latn",
188
+ "tzm_Tfng",
189
+ "uig_Arab",
190
+ "ukr_Cyrl",
191
+ "umb_Latn",
192
+ "urd_Arab",
193
+ "uzn_Latn",
194
+ "vec_Latn",
195
+ "vie_Latn",
196
+ "war_Latn",
197
+ "wol_Latn",
198
+ "xho_Latn",
199
+ "ydd_Hebr",
200
+ "yor_Latn",
201
+ "yue_Hant",
202
+ "zho_Hans",
203
+ "zho_Hant",
204
+ "zul_Latn"
205
+ ],
206
+ "bos_token": {
207
+ "content": "<s>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false
212
+ },
213
+ "cls_token": {
214
+ "content": "<s>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false
219
+ },
220
+ "eos_token": {
221
+ "content": "</s>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false
226
+ },
227
+ "mask_token": {
228
+ "content": "<mask>",
229
+ "lstrip": true,
230
+ "normalized": true,
231
+ "rstrip": false,
232
+ "single_word": false
233
+ },
234
+ "pad_token": {
235
+ "content": "<pad>",
236
+ "lstrip": false,
237
+ "normalized": false,
238
+ "rstrip": false,
239
+ "single_word": false
240
+ },
241
+ "sep_token": {
242
+ "content": "</s>",
243
+ "lstrip": false,
244
+ "normalized": false,
245
+ "rstrip": false,
246
+ "single_word": false
247
+ },
248
+ "unk_token": {
249
+ "content": "<unk>",
250
+ "lstrip": false,
251
+ "normalized": false,
252
+ "rstrip": false,
253
+ "single_word": false
254
+ }
255
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45257b5f8d89c216028da66428d96724a614f8d7b2c9c0370ec786c3208de5ff
3
+ size 32423756
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:709617ed317d36c6760fc8f492465b0c6da049dc3b48f43febacbceff61436ce
3
+ size 5496