fix out of vocab token
Browse files- tokenization_interns1.py +3 -1
    	
        tokenization_interns1.py
    CHANGED
    
    | @@ -893,7 +893,9 @@ class InternS1Tokenizer(Qwen2Tokenizer): | |
| 893 |  | 
| 894 | 
             
                def convert_tokens_to_string(self, tokens):
         | 
| 895 | 
             
                    """Converts a sequence of tokens (string) in a single string."""
         | 
| 896 | 
            -
                    text = "" | 
|  | |
|  | |
| 897 | 
             
                    text = text.replace(
         | 
| 898 | 
             
                        "▁", "Ġ"
         | 
| 899 | 
             
                    )  # This discrepancy stems from differing whitespace treatment in SentencePiece versus BPE tokenization.
         | 
|  | |
| 893 |  | 
| 894 | 
             
                def convert_tokens_to_string(self, tokens):
         | 
| 895 | 
             
                    """Converts a sequence of tokens (string) in a single string."""
         | 
| 896 | 
            +
                    text = ""
         | 
| 897 | 
            +
                    for token in tokens:
         | 
| 898 | 
            +
                        text += token if token else ""
         | 
| 899 | 
             
                    text = text.replace(
         | 
| 900 | 
             
                        "▁", "Ġ"
         | 
| 901 | 
             
                    )  # This discrepancy stems from differing whitespace treatment in SentencePiece versus BPE tokenization.
         | 

