Commit
·
981291b
1
Parent(s):
ce0a59a
Update tokenization_rwkv_world.py
Browse files
tokenization_rwkv_world.py
CHANGED
|
@@ -281,7 +281,11 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
|
|
| 281 |
)
|
| 282 |
|
| 283 |
with open(vocab_file, "w", encoding="utf-8") as f:
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
return (vocab_file,)
|
| 287 |
|
|
|
|
| 281 |
)
|
| 282 |
|
| 283 |
with open(vocab_file, "w", encoding="utf-8") as f:
|
| 284 |
+
for idx, x in self.encoder.items():
|
| 285 |
+
if isinstance(x, str):
|
| 286 |
+
x = x.decode("utf-8")
|
| 287 |
+
line = f"{idx} {repr(x)} {len(x)}\n"
|
| 288 |
+
f.write(line)
|
| 289 |
|
| 290 |
return (vocab_file,)
|
| 291 |
|