aryaadhi commited on
Commit
3e814e6
·
verified ·
1 Parent(s): 988a73f

Best processor with eval_loss 4.6618

Browse files
Files changed (4) hide show
  1. added_tokens.json +14 -14
  2. special_tokens_map.json +14 -14
  3. tokenizer.json +14 -14
  4. tokenizer_config.json +18 -18
added_tokens.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "</s_agama>": 57554,
3
- "</s_alamat>": 57552,
4
- "</s_berlaku>": 57558,
5
- "</s_gol_darah>": 57550,
6
- "</s_jkel>": 57548,
7
- "</s_kabkota_issuer>": 57546,
8
- "</s_kecamatan>": 57556,
9
  "</s_kelurahan>": 57544,
10
  "</s_nama>": 57542,
11
  "</s_nik>": 57540,
@@ -16,14 +16,14 @@
16
  "</s_tgl_terbit>": 57530,
17
  "</s_ttl>": 57528,
18
  "</s_wn>": 57526,
19
- "<s_agama>": 57553,
20
- "<s_alamat>": 57551,
21
- "<s_berlaku>": 57557,
22
- "<s_gol_darah>": 57549,
23
  "<s_iitcdip>": 57523,
24
- "<s_jkel>": 57547,
25
- "<s_kabkota_issuer>": 57545,
26
- "<s_kecamatan>": 57555,
27
  "<s_kelurahan>": 57543,
28
  "<s_nama>": 57541,
29
  "<s_nik>": 57539,
 
1
  {
2
+ "</s_agama>": 57558,
3
+ "</s_alamat>": 57556,
4
+ "</s_berlaku>": 57554,
5
+ "</s_gol_darah>": 57552,
6
+ "</s_jkel>": 57550,
7
+ "</s_kabkota_issuer>": 57548,
8
+ "</s_kecamatan>": 57546,
9
  "</s_kelurahan>": 57544,
10
  "</s_nama>": 57542,
11
  "</s_nik>": 57540,
 
16
  "</s_tgl_terbit>": 57530,
17
  "</s_ttl>": 57528,
18
  "</s_wn>": 57526,
19
+ "<s_agama>": 57557,
20
+ "<s_alamat>": 57555,
21
+ "<s_berlaku>": 57553,
22
+ "<s_gol_darah>": 57551,
23
  "<s_iitcdip>": 57523,
24
+ "<s_jkel>": 57549,
25
+ "<s_kabkota_issuer>": 57547,
26
+ "<s_kecamatan>": 57545,
27
  "<s_kelurahan>": 57543,
28
  "<s_nama>": 57541,
29
  "<s_nik>": 57539,
special_tokens_map.json CHANGED
@@ -141,98 +141,98 @@
141
  "single_word": false
142
  },
143
  {
144
- "content": "<s_kabkota_issuer>",
145
  "lstrip": false,
146
  "normalized": false,
147
  "rstrip": false,
148
  "single_word": false
149
  },
150
  {
151
- "content": "</s_kabkota_issuer>",
152
  "lstrip": false,
153
  "normalized": false,
154
  "rstrip": false,
155
  "single_word": false
156
  },
157
  {
158
- "content": "<s_jkel>",
159
  "lstrip": false,
160
  "normalized": false,
161
  "rstrip": false,
162
  "single_word": false
163
  },
164
  {
165
- "content": "</s_jkel>",
166
  "lstrip": false,
167
  "normalized": false,
168
  "rstrip": false,
169
  "single_word": false
170
  },
171
  {
172
- "content": "<s_gol_darah>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
176
  "single_word": false
177
  },
178
  {
179
- "content": "</s_gol_darah>",
180
  "lstrip": false,
181
  "normalized": false,
182
  "rstrip": false,
183
  "single_word": false
184
  },
185
  {
186
- "content": "<s_alamat>",
187
  "lstrip": false,
188
  "normalized": false,
189
  "rstrip": false,
190
  "single_word": false
191
  },
192
  {
193
- "content": "</s_alamat>",
194
  "lstrip": false,
195
  "normalized": false,
196
  "rstrip": false,
197
  "single_word": false
198
  },
199
  {
200
- "content": "<s_agama>",
201
  "lstrip": false,
202
  "normalized": false,
203
  "rstrip": false,
204
  "single_word": false
205
  },
206
  {
207
- "content": "</s_agama>",
208
  "lstrip": false,
209
  "normalized": false,
210
  "rstrip": false,
211
  "single_word": false
212
  },
213
  {
214
- "content": "<s_kecamatan>",
215
  "lstrip": false,
216
  "normalized": false,
217
  "rstrip": false,
218
  "single_word": false
219
  },
220
  {
221
- "content": "</s_kecamatan>",
222
  "lstrip": false,
223
  "normalized": false,
224
  "rstrip": false,
225
  "single_word": false
226
  },
227
  {
228
- "content": "<s_berlaku>",
229
  "lstrip": false,
230
  "normalized": false,
231
  "rstrip": false,
232
  "single_word": false
233
  },
234
  {
235
- "content": "</s_berlaku>",
236
  "lstrip": false,
237
  "normalized": false,
238
  "rstrip": false,
 
141
  "single_word": false
142
  },
143
  {
144
+ "content": "<s_kecamatan>",
145
  "lstrip": false,
146
  "normalized": false,
147
  "rstrip": false,
148
  "single_word": false
149
  },
150
  {
151
+ "content": "</s_kecamatan>",
152
  "lstrip": false,
153
  "normalized": false,
154
  "rstrip": false,
155
  "single_word": false
156
  },
157
  {
158
+ "content": "<s_kabkota_issuer>",
159
  "lstrip": false,
160
  "normalized": false,
161
  "rstrip": false,
162
  "single_word": false
163
  },
164
  {
165
+ "content": "</s_kabkota_issuer>",
166
  "lstrip": false,
167
  "normalized": false,
168
  "rstrip": false,
169
  "single_word": false
170
  },
171
  {
172
+ "content": "<s_jkel>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
176
  "single_word": false
177
  },
178
  {
179
+ "content": "</s_jkel>",
180
  "lstrip": false,
181
  "normalized": false,
182
  "rstrip": false,
183
  "single_word": false
184
  },
185
  {
186
+ "content": "<s_gol_darah>",
187
  "lstrip": false,
188
  "normalized": false,
189
  "rstrip": false,
190
  "single_word": false
191
  },
192
  {
193
+ "content": "</s_gol_darah>",
194
  "lstrip": false,
195
  "normalized": false,
196
  "rstrip": false,
197
  "single_word": false
198
  },
199
  {
200
+ "content": "<s_berlaku>",
201
  "lstrip": false,
202
  "normalized": false,
203
  "rstrip": false,
204
  "single_word": false
205
  },
206
  {
207
+ "content": "</s_berlaku>",
208
  "lstrip": false,
209
  "normalized": false,
210
  "rstrip": false,
211
  "single_word": false
212
  },
213
  {
214
+ "content": "<s_alamat>",
215
  "lstrip": false,
216
  "normalized": false,
217
  "rstrip": false,
218
  "single_word": false
219
  },
220
  {
221
+ "content": "</s_alamat>",
222
  "lstrip": false,
223
  "normalized": false,
224
  "rstrip": false,
225
  "single_word": false
226
  },
227
  {
228
+ "content": "<s_agama>",
229
  "lstrip": false,
230
  "normalized": false,
231
  "rstrip": false,
232
  "single_word": false
233
  },
234
  {
235
+ "content": "</s_agama>",
236
  "lstrip": false,
237
  "normalized": false,
238
  "rstrip": false,
tokenizer.json CHANGED
@@ -271,7 +271,7 @@
271
  },
272
  {
273
  "id": 57545,
274
- "content": "<s_kabkota_issuer>",
275
  "single_word": false,
276
  "lstrip": false,
277
  "rstrip": false,
@@ -280,7 +280,7 @@
280
  },
281
  {
282
  "id": 57546,
283
- "content": "</s_kabkota_issuer>",
284
  "single_word": false,
285
  "lstrip": false,
286
  "rstrip": false,
@@ -289,7 +289,7 @@
289
  },
290
  {
291
  "id": 57547,
292
- "content": "<s_jkel>",
293
  "single_word": false,
294
  "lstrip": false,
295
  "rstrip": false,
@@ -298,7 +298,7 @@
298
  },
299
  {
300
  "id": 57548,
301
- "content": "</s_jkel>",
302
  "single_word": false,
303
  "lstrip": false,
304
  "rstrip": false,
@@ -307,7 +307,7 @@
307
  },
308
  {
309
  "id": 57549,
310
- "content": "<s_gol_darah>",
311
  "single_word": false,
312
  "lstrip": false,
313
  "rstrip": false,
@@ -316,7 +316,7 @@
316
  },
317
  {
318
  "id": 57550,
319
- "content": "</s_gol_darah>",
320
  "single_word": false,
321
  "lstrip": false,
322
  "rstrip": false,
@@ -325,7 +325,7 @@
325
  },
326
  {
327
  "id": 57551,
328
- "content": "<s_alamat>",
329
  "single_word": false,
330
  "lstrip": false,
331
  "rstrip": false,
@@ -334,7 +334,7 @@
334
  },
335
  {
336
  "id": 57552,
337
- "content": "</s_alamat>",
338
  "single_word": false,
339
  "lstrip": false,
340
  "rstrip": false,
@@ -343,7 +343,7 @@
343
  },
344
  {
345
  "id": 57553,
346
- "content": "<s_agama>",
347
  "single_word": false,
348
  "lstrip": false,
349
  "rstrip": false,
@@ -352,7 +352,7 @@
352
  },
353
  {
354
  "id": 57554,
355
- "content": "</s_agama>",
356
  "single_word": false,
357
  "lstrip": false,
358
  "rstrip": false,
@@ -361,7 +361,7 @@
361
  },
362
  {
363
  "id": 57555,
364
- "content": "<s_kecamatan>",
365
  "single_word": false,
366
  "lstrip": false,
367
  "rstrip": false,
@@ -370,7 +370,7 @@
370
  },
371
  {
372
  "id": 57556,
373
- "content": "</s_kecamatan>",
374
  "single_word": false,
375
  "lstrip": false,
376
  "rstrip": false,
@@ -379,7 +379,7 @@
379
  },
380
  {
381
  "id": 57557,
382
- "content": "<s_berlaku>",
383
  "single_word": false,
384
  "lstrip": false,
385
  "rstrip": false,
@@ -388,7 +388,7 @@
388
  },
389
  {
390
  "id": 57558,
391
- "content": "</s_berlaku>",
392
  "single_word": false,
393
  "lstrip": false,
394
  "rstrip": false,
 
271
  },
272
  {
273
  "id": 57545,
274
+ "content": "<s_kecamatan>",
275
  "single_word": false,
276
  "lstrip": false,
277
  "rstrip": false,
 
280
  },
281
  {
282
  "id": 57546,
283
+ "content": "</s_kecamatan>",
284
  "single_word": false,
285
  "lstrip": false,
286
  "rstrip": false,
 
289
  },
290
  {
291
  "id": 57547,
292
+ "content": "<s_kabkota_issuer>",
293
  "single_word": false,
294
  "lstrip": false,
295
  "rstrip": false,
 
298
  },
299
  {
300
  "id": 57548,
301
+ "content": "</s_kabkota_issuer>",
302
  "single_word": false,
303
  "lstrip": false,
304
  "rstrip": false,
 
307
  },
308
  {
309
  "id": 57549,
310
+ "content": "<s_jkel>",
311
  "single_word": false,
312
  "lstrip": false,
313
  "rstrip": false,
 
316
  },
317
  {
318
  "id": 57550,
319
+ "content": "</s_jkel>",
320
  "single_word": false,
321
  "lstrip": false,
322
  "rstrip": false,
 
325
  },
326
  {
327
  "id": 57551,
328
+ "content": "<s_gol_darah>",
329
  "single_word": false,
330
  "lstrip": false,
331
  "rstrip": false,
 
334
  },
335
  {
336
  "id": 57552,
337
+ "content": "</s_gol_darah>",
338
  "single_word": false,
339
  "lstrip": false,
340
  "rstrip": false,
 
343
  },
344
  {
345
  "id": 57553,
346
+ "content": "<s_berlaku>",
347
  "single_word": false,
348
  "lstrip": false,
349
  "rstrip": false,
 
352
  },
353
  {
354
  "id": 57554,
355
+ "content": "</s_berlaku>",
356
  "single_word": false,
357
  "lstrip": false,
358
  "rstrip": false,
 
361
  },
362
  {
363
  "id": 57555,
364
+ "content": "<s_alamat>",
365
  "single_word": false,
366
  "lstrip": false,
367
  "rstrip": false,
 
370
  },
371
  {
372
  "id": 57556,
373
+ "content": "</s_alamat>",
374
  "single_word": false,
375
  "lstrip": false,
376
  "rstrip": false,
 
379
  },
380
  {
381
  "id": 57557,
382
+ "content": "<s_agama>",
383
  "single_word": false,
384
  "lstrip": false,
385
  "rstrip": false,
 
388
  },
389
  {
390
  "id": 57558,
391
+ "content": "</s_agama>",
392
  "single_word": false,
393
  "lstrip": false,
394
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -225,7 +225,7 @@
225
  "special": true
226
  },
227
  "57545": {
228
- "content": "<s_kabkota_issuer>",
229
  "lstrip": false,
230
  "normalized": false,
231
  "rstrip": false,
@@ -233,7 +233,7 @@
233
  "special": true
234
  },
235
  "57546": {
236
- "content": "</s_kabkota_issuer>",
237
  "lstrip": false,
238
  "normalized": false,
239
  "rstrip": false,
@@ -241,7 +241,7 @@
241
  "special": true
242
  },
243
  "57547": {
244
- "content": "<s_jkel>",
245
  "lstrip": false,
246
  "normalized": false,
247
  "rstrip": false,
@@ -249,7 +249,7 @@
249
  "special": true
250
  },
251
  "57548": {
252
- "content": "</s_jkel>",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
@@ -257,7 +257,7 @@
257
  "special": true
258
  },
259
  "57549": {
260
- "content": "<s_gol_darah>",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
@@ -265,7 +265,7 @@
265
  "special": true
266
  },
267
  "57550": {
268
- "content": "</s_gol_darah>",
269
  "lstrip": false,
270
  "normalized": false,
271
  "rstrip": false,
@@ -273,7 +273,7 @@
273
  "special": true
274
  },
275
  "57551": {
276
- "content": "<s_alamat>",
277
  "lstrip": false,
278
  "normalized": false,
279
  "rstrip": false,
@@ -281,7 +281,7 @@
281
  "special": true
282
  },
283
  "57552": {
284
- "content": "</s_alamat>",
285
  "lstrip": false,
286
  "normalized": false,
287
  "rstrip": false,
@@ -289,7 +289,7 @@
289
  "special": true
290
  },
291
  "57553": {
292
- "content": "<s_agama>",
293
  "lstrip": false,
294
  "normalized": false,
295
  "rstrip": false,
@@ -297,7 +297,7 @@
297
  "special": true
298
  },
299
  "57554": {
300
- "content": "</s_agama>",
301
  "lstrip": false,
302
  "normalized": false,
303
  "rstrip": false,
@@ -305,7 +305,7 @@
305
  "special": true
306
  },
307
  "57555": {
308
- "content": "<s_kecamatan>",
309
  "lstrip": false,
310
  "normalized": false,
311
  "rstrip": false,
@@ -313,7 +313,7 @@
313
  "special": true
314
  },
315
  "57556": {
316
- "content": "</s_kecamatan>",
317
  "lstrip": false,
318
  "normalized": false,
319
  "rstrip": false,
@@ -321,7 +321,7 @@
321
  "special": true
322
  },
323
  "57557": {
324
- "content": "<s_berlaku>",
325
  "lstrip": false,
326
  "normalized": false,
327
  "rstrip": false,
@@ -329,7 +329,7 @@
329
  "special": true
330
  },
331
  "57558": {
332
- "content": "</s_berlaku>",
333
  "lstrip": false,
334
  "normalized": false,
335
  "rstrip": false,
@@ -358,20 +358,20 @@
358
  "</s_nama>",
359
  "<s_kelurahan>",
360
  "</s_kelurahan>",
 
 
361
  "<s_kabkota_issuer>",
362
  "</s_kabkota_issuer>",
363
  "<s_jkel>",
364
  "</s_jkel>",
365
  "<s_gol_darah>",
366
  "</s_gol_darah>",
 
 
367
  "<s_alamat>",
368
  "</s_alamat>",
369
  "<s_agama>",
370
  "</s_agama>",
371
- "<s_kecamatan>",
372
- "</s_kecamatan>",
373
- "<s_berlaku>",
374
- "</s_berlaku>",
375
  "<s>",
376
  "</s>"
377
  ],
 
225
  "special": true
226
  },
227
  "57545": {
228
+ "content": "<s_kecamatan>",
229
  "lstrip": false,
230
  "normalized": false,
231
  "rstrip": false,
 
233
  "special": true
234
  },
235
  "57546": {
236
+ "content": "</s_kecamatan>",
237
  "lstrip": false,
238
  "normalized": false,
239
  "rstrip": false,
 
241
  "special": true
242
  },
243
  "57547": {
244
+ "content": "<s_kabkota_issuer>",
245
  "lstrip": false,
246
  "normalized": false,
247
  "rstrip": false,
 
249
  "special": true
250
  },
251
  "57548": {
252
+ "content": "</s_kabkota_issuer>",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
 
257
  "special": true
258
  },
259
  "57549": {
260
+ "content": "<s_jkel>",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
 
265
  "special": true
266
  },
267
  "57550": {
268
+ "content": "</s_jkel>",
269
  "lstrip": false,
270
  "normalized": false,
271
  "rstrip": false,
 
273
  "special": true
274
  },
275
  "57551": {
276
+ "content": "<s_gol_darah>",
277
  "lstrip": false,
278
  "normalized": false,
279
  "rstrip": false,
 
281
  "special": true
282
  },
283
  "57552": {
284
+ "content": "</s_gol_darah>",
285
  "lstrip": false,
286
  "normalized": false,
287
  "rstrip": false,
 
289
  "special": true
290
  },
291
  "57553": {
292
+ "content": "<s_berlaku>",
293
  "lstrip": false,
294
  "normalized": false,
295
  "rstrip": false,
 
297
  "special": true
298
  },
299
  "57554": {
300
+ "content": "</s_berlaku>",
301
  "lstrip": false,
302
  "normalized": false,
303
  "rstrip": false,
 
305
  "special": true
306
  },
307
  "57555": {
308
+ "content": "<s_alamat>",
309
  "lstrip": false,
310
  "normalized": false,
311
  "rstrip": false,
 
313
  "special": true
314
  },
315
  "57556": {
316
+ "content": "</s_alamat>",
317
  "lstrip": false,
318
  "normalized": false,
319
  "rstrip": false,
 
321
  "special": true
322
  },
323
  "57557": {
324
+ "content": "<s_agama>",
325
  "lstrip": false,
326
  "normalized": false,
327
  "rstrip": false,
 
329
  "special": true
330
  },
331
  "57558": {
332
+ "content": "</s_agama>",
333
  "lstrip": false,
334
  "normalized": false,
335
  "rstrip": false,
 
358
  "</s_nama>",
359
  "<s_kelurahan>",
360
  "</s_kelurahan>",
361
+ "<s_kecamatan>",
362
+ "</s_kecamatan>",
363
  "<s_kabkota_issuer>",
364
  "</s_kabkota_issuer>",
365
  "<s_jkel>",
366
  "</s_jkel>",
367
  "<s_gol_darah>",
368
  "</s_gol_darah>",
369
+ "<s_berlaku>",
370
+ "</s_berlaku>",
371
  "<s_alamat>",
372
  "</s_alamat>",
373
  "<s_agama>",
374
  "</s_agama>",
 
 
 
 
375
  "<s>",
376
  "</s>"
377
  ],