update tokenizer for compatibility with new transformers
Browse filespadding_side added to _pad method signature in transformers 4.45
- tokenization_chatglm.py +3 -0
tokenization_chatglm.py
CHANGED
|
@@ -271,6 +271,8 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
| 271 |
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
| 272 |
pad_to_multiple_of: Optional[int] = None,
|
| 273 |
return_attention_mask: Optional[bool] = None,
|
|
|
|
|
|
|
| 274 |
) -> dict:
|
| 275 |
"""
|
| 276 |
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
|
@@ -297,6 +299,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
| 297 |
"""
|
| 298 |
# Load from model defaults
|
| 299 |
assert self.padding_side == "left"
|
|
|
|
| 300 |
|
| 301 |
required_input = encoded_inputs[self.model_input_names[0]]
|
| 302 |
seq_length = len(required_input)
|
|
|
|
| 271 |
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
| 272 |
pad_to_multiple_of: Optional[int] = None,
|
| 273 |
return_attention_mask: Optional[bool] = None,
|
| 274 |
+
padding_side: Optional[bool] = None,
|
| 275 |
+
**kwargs
|
| 276 |
) -> dict:
|
| 277 |
"""
|
| 278 |
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
|
|
|
| 299 |
"""
|
| 300 |
# Load from model defaults
|
| 301 |
assert self.padding_side == "left"
|
| 302 |
+
assert padding_side is None or padding_side == "left"
|
| 303 |
|
| 304 |
required_input = encoded_inputs[self.model_input_names[0]]
|
| 305 |
seq_length = len(required_input)
|