internlm
/

Intern-S1-mini

Image-Text-to-Text

text-generation

Model card Files Files and versions

RangiLyu commited on 5 days ago

Commit

a6c5212

·

verified ·

1 Parent(s): d790aca

fix out of vocab token

Files changed (1) hide show

tokenization_interns1.py +3 -1

tokenization_interns1.py CHANGED Viewed

@@ -893,7 +893,9 @@ class InternS1Tokenizer(Qwen2Tokenizer):
     def convert_tokens_to_string(self, tokens):
         """Converts a sequence of tokens (string) in a single string."""
-        text = "".join(tokens)
         text = text.replace(
             "▁", "Ġ"
         )  # This discrepancy stems from differing whitespace treatment in SentencePiece versus BPE tokenization.

     def convert_tokens_to_string(self, tokens):
         """Converts a sequence of tokens (string) in a single string."""
+        text = ""
+        for token in tokens:
+            text += token if token else ""
         text = text.replace(
             "▁", "Ġ"
         )  # This discrepancy stems from differing whitespace treatment in SentencePiece versus BPE tokenization.