joanrodai commited on
Commit
ac1e92e
·
verified ·
1 Parent(s): 9673981

Add custom processor

Browse files
Files changed (4) hide show
  1. processor_config.json +14 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +2 -0
  4. vocab.json +0 -0
processor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mean": [
3
+ 0.48145466,
4
+ 0.4578275,
5
+ 0.40821073
6
+ ],
7
+ "processor_class": "SimpleStarVectorProcessor",
8
+ "size": 224,
9
+ "std": [
10
+ 0.26862954,
11
+ 0.26130258,
12
+ 0.27577711
13
+ ]
14
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -212,8 +212,10 @@
212
  "clean_up_tokenization_spaces": true,
213
  "eos_token": "<|endoftext|>",
214
  "errors": "replace",
 
215
  "model_max_length": 1000000000000000019884624838656,
216
  "pad_token": "[PAD]",
 
217
  "tokenizer_class": "GPT2Tokenizer",
218
  "unk_token": "<|endoftext|>",
219
  "vocab_size": 49152
 
212
  "clean_up_tokenization_spaces": true,
213
  "eos_token": "<|endoftext|>",
214
  "errors": "replace",
215
+ "extra_special_tokens": {},
216
  "model_max_length": 1000000000000000019884624838656,
217
  "pad_token": "[PAD]",
218
+ "processor_class": "SimpleStarVectorProcessor",
219
  "tokenizer_class": "GPT2Tokenizer",
220
  "unk_token": "<|endoftext|>",
221
  "vocab_size": 49152
vocab.json CHANGED
The diff for this file is too large to render. See raw diff