RangiLyu commited on
Commit
a6c5212
·
verified ·
1 Parent(s): d790aca

fix out of vocab token

Browse files
Files changed (1) hide show
  1. tokenization_interns1.py +3 -1
tokenization_interns1.py CHANGED
@@ -893,7 +893,9 @@ class InternS1Tokenizer(Qwen2Tokenizer):
893
 
894
  def convert_tokens_to_string(self, tokens):
895
  """Converts a sequence of tokens (string) in a single string."""
896
- text = "".join(tokens)
 
 
897
  text = text.replace(
898
  "▁", "Ġ"
899
  ) # This discrepancy stems from differing whitespace treatment in SentencePiece versus BPE tokenization.
 
893
 
894
  def convert_tokens_to_string(self, tokens):
895
  """Converts a sequence of tokens (string) in a single string."""
896
+ text = ""
897
+ for token in tokens:
898
+ text += token if token else ""
899
  text = text.replace(
900
  "▁", "Ġ"
901
  ) # This discrepancy stems from differing whitespace treatment in SentencePiece versus BPE tokenization.