Nanobit commited on
Commit
8475e22
·
verified ·
1 Parent(s): cb52c66

fix: update pad token in tokenizer following upstream

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +1 -9
tokenizer_config.json CHANGED
@@ -9071,14 +9071,6 @@
9071
  "rstrip": false,
9072
  "single_word": false,
9073
  "special": true
9074
- },
9075
- "201134": {
9076
- "content": "<|finetune_right_pad_id|>",
9077
- "lstrip": false,
9078
- "normalized": false,
9079
- "rstrip": false,
9080
- "single_word": false,
9081
- "special": true
9082
  }
9083
  },
9084
  "bos_token": "<|begin_of_text|>",
@@ -9090,7 +9082,7 @@
9090
  "attention_mask"
9091
  ],
9092
  "model_max_length": 262144,
9093
- "pad_token": "<|finetune_right_pad_id|>",
9094
  "processor_class": "Llama4Processor",
9095
  "tokenizer_class": "PreTrainedTokenizer"
9096
  }
 
9071
  "rstrip": false,
9072
  "single_word": false,
9073
  "special": true
 
 
 
 
 
 
 
 
9074
  }
9075
  },
9076
  "bos_token": "<|begin_of_text|>",
 
9082
  "attention_mask"
9083
  ],
9084
  "model_max_length": 262144,
9085
+ "pad_token": "<|finetune_right_pad|>",
9086
  "processor_class": "Llama4Processor",
9087
  "tokenizer_class": "PreTrainedTokenizer"
9088
  }