Commit
·
b5bfb10
1
Parent(s):
57098db
Update README.md
Browse files
README.md
CHANGED
|
@@ -147,6 +147,8 @@ Training data can be provided upon request.
|
|
| 147 |
|
| 148 |
- **Generation**
|
| 149 |
|
|
|
|
|
|
|
| 150 |
```
|
| 151 |
from transformers import M2M100ForConditionalGeneration
|
| 152 |
from tokenization_small100 import SMALL100Tokenizer
|
|
@@ -160,14 +162,14 @@ tokenizer = SMALL100Tokenizer.from_pretrained("alirezamsh/small100")
|
|
| 160 |
# translate Hindi to French
|
| 161 |
tokenizer.tgt_lang = "fr"
|
| 162 |
encoded_hi = tokenizer(hi_text, return_tensors="pt")
|
| 163 |
-
generated_tokens = model.generate(**encoded_hi)
|
| 164 |
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
| 165 |
# => "La vie est comme une boîte de chocolat."
|
| 166 |
|
| 167 |
# translate Chinese to English
|
| 168 |
tokenizer.tgt_lang = "en"
|
| 169 |
encoded_zh = tokenizer(chinese_text, return_tensors="pt")
|
| 170 |
-
generated_tokens = model.generate(**encoded_zh)
|
| 171 |
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
| 172 |
# => "Life is like a box of chocolate."
|
| 173 |
```
|
|
|
|
| 147 |
|
| 148 |
- **Generation**
|
| 149 |
|
| 150 |
+
Beam size of 5, and maximum target length of 256 is used for the generation.
|
| 151 |
+
|
| 152 |
```
|
| 153 |
from transformers import M2M100ForConditionalGeneration
|
| 154 |
from tokenization_small100 import SMALL100Tokenizer
|
|
|
|
| 162 |
# translate Hindi to French
|
| 163 |
tokenizer.tgt_lang = "fr"
|
| 164 |
encoded_hi = tokenizer(hi_text, return_tensors="pt")
|
| 165 |
+
generated_tokens = model.generate(**encoded_hi, max_length=256, num_beams=5)
|
| 166 |
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
| 167 |
# => "La vie est comme une boîte de chocolat."
|
| 168 |
|
| 169 |
# translate Chinese to English
|
| 170 |
tokenizer.tgt_lang = "en"
|
| 171 |
encoded_zh = tokenizer(chinese_text, return_tensors="pt")
|
| 172 |
+
generated_tokens = model.generate(**encoded_zh, max_length=256, num_beams=5)
|
| 173 |
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
| 174 |
# => "Life is like a box of chocolate."
|
| 175 |
```
|