fixes.
Browse files- .gitignore +1 -0
- README.md +5 -2
- test.py +1 -0
.gitignore
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
ckpt/
|
| 2 |
*.tar.gz
|
| 3 |
*.swp
|
|
|
|
|
|
| 1 |
ckpt/
|
| 2 |
*.tar.gz
|
| 3 |
*.swp
|
| 4 |
+
pya0
|
README.md
CHANGED
|
@@ -26,8 +26,11 @@ Download your tokenizer, model checkpoints, and optionally the training logs (`e
|
|
| 26 |
|
| 27 |
Optionally, test model using the MLM task:
|
| 28 |
```sh
|
| 29 |
-
pip install pya0
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
| 31 |
```
|
| 32 |
> **Note**
|
| 33 |
> Modify the test examples in `test.txt` to play with it.
|
|
|
|
| 26 |
|
| 27 |
Optionally, test model using the MLM task:
|
| 28 |
```sh
|
| 29 |
+
pip install pya0 # for math token preprocessing
|
| 30 |
+
# testing local checkpoints:
|
| 31 |
+
python test.py ./ckpt/math-tokenizer ./ckpt/2-2-0/encoder.ckpt
|
| 32 |
+
# testing Model Hub checkpoints:
|
| 33 |
+
python test.py approach0/coco-mae-220 approach0/coco-mae-220
|
| 34 |
```
|
| 35 |
> **Note**
|
| 36 |
> Modify the test examples in `test.txt` to play with it.
|
test.py
CHANGED
|
@@ -43,6 +43,7 @@ def test(tokenizer_name_or_path, model_name_or_path, test_file='test.txt'):
|
|
| 43 |
for pos in filter(lambda x: x!=0, maskpos):
|
| 44 |
tokens[pos-1] = '[MASK]'
|
| 45 |
sentence = ' '.join(tokens)
|
|
|
|
| 46 |
tokens = tokenizer(sentence,
|
| 47 |
padding=True, truncation=True, return_tensors="pt")
|
| 48 |
#print(tokenizer.decode(tokens['input_ids'][0]))
|
|
|
|
| 43 |
for pos in filter(lambda x: x!=0, maskpos):
|
| 44 |
tokens[pos-1] = '[MASK]'
|
| 45 |
sentence = ' '.join(tokens)
|
| 46 |
+
sentence = sentence.replace('[mask]', '[MASK]')
|
| 47 |
tokens = tokenizer(sentence,
|
| 48 |
padding=True, truncation=True, return_tensors="pt")
|
| 49 |
#print(tokenizer.decode(tokens['input_ids'][0]))
|