Pclanglais commited on
Commit
c290319
·
verified ·
1 Parent(s): f6a63fc

Upload folder using huggingface_hub

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c1e468ff18a8a93027bf970cc9267c6ee62d2719edbc5329d9be198d98fb674
3
  size 2390960584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc0df86988365766bd8ccb29f2cfaf1d4efaefd7a909f32161a270b2f280d9d
3
  size 2390960584
special_tokens_map.json CHANGED
@@ -2,18 +2,22 @@
2
  "additional_special_tokens": [
3
  "<|query_start|>",
4
  "<|query_end|>",
 
5
  "<|source_id|>",
 
 
 
6
  "<|query_analysis_start|>",
7
  "<|query_analysis_end|>",
8
  "<|query_report_start|>",
9
  "<|query_report_end|>",
10
  "<|source_analysis_start|>",
11
  "<|source_analysis_end|>",
 
 
12
  "<|draft_start|>",
13
  "<|draft_end|>",
14
  "<|answer_start|>",
15
- "<|answer_end|>",
16
- "<|source_start|>",
17
- "<|source_end|>"
18
  ]
19
  }
 
2
  "additional_special_tokens": [
3
  "<|query_start|>",
4
  "<|query_end|>",
5
+ "<|source_start|>",
6
  "<|source_id|>",
7
+ "<|source_end|>",
8
+ "<|language_start|>",
9
+ "<|language_end|>",
10
  "<|query_analysis_start|>",
11
  "<|query_analysis_end|>",
12
  "<|query_report_start|>",
13
  "<|query_report_end|>",
14
  "<|source_analysis_start|>",
15
  "<|source_analysis_end|>",
16
+ "<|source_report_start|>",
17
+ "<|source_report_end|>",
18
  "<|draft_start|>",
19
  "<|draft_end|>",
20
  "<|answer_start|>",
21
+ "<|answer_end|>"
 
 
22
  ]
23
  }
tokenizer.json CHANGED
@@ -40,7 +40,7 @@
40
  "special": true
41
  },
42
  {
43
- "id": 65521,
44
  "content": "<|query_start|>",
45
  "single_word": false,
46
  "lstrip": false,
@@ -49,7 +49,7 @@
49
  "special": true
50
  },
51
  {
52
- "id": 65522,
53
  "content": "<|query_end|>",
54
  "single_word": false,
55
  "lstrip": false,
@@ -58,7 +58,16 @@
58
  "special": true
59
  },
60
  {
61
- "id": 65523,
 
 
 
 
 
 
 
 
 
62
  "content": "<|source_id|>",
63
  "single_word": false,
64
  "lstrip": false,
@@ -66,6 +75,33 @@
66
  "normalized": false,
67
  "special": true
68
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  {
70
  "id": 65524,
71
  "content": "<|query_analysis_start|>",
@@ -122,7 +158,7 @@
122
  },
123
  {
124
  "id": 65530,
125
- "content": "<|draft_start|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
@@ -131,7 +167,7 @@
131
  },
132
  {
133
  "id": 65531,
134
- "content": "<|draft_end|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
@@ -140,7 +176,7 @@
140
  },
141
  {
142
  "id": 65532,
143
- "content": "<|answer_start|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
@@ -149,7 +185,7 @@
149
  },
150
  {
151
  "id": 65533,
152
- "content": "<|answer_end|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
@@ -158,7 +194,7 @@
158
  },
159
  {
160
  "id": 65534,
161
- "content": "<|source_start|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
@@ -167,7 +203,7 @@
167
  },
168
  {
169
  "id": 65535,
170
- "content": "<|source_end|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
@@ -195,7 +231,98 @@
195
  }
196
  ]
197
  },
198
- "post_processor": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  "decoder": {
200
  "type": "ByteLevel",
201
  "add_prefix_space": true,
@@ -65728,11 +65855,7 @@
65728
  "Ġ709": 65513,
65729
  "Ġménages": 65514,
65730
  "Ġprodukty": 65515,
65731
- "Ġsuum": 65516,
65732
- "Ġinfluenc": 65517,
65733
- "Ġcatching": 65518,
65734
- "ĠÏĮÏĦαν": 65519,
65735
- "737": 65520
65736
  },
65737
  "merges": [
65738
  "Ġ Ġ",
@@ -131004,11 +131127,7 @@
131004
  "Ġ7 09",
131005
  "Ġmén ages",
131006
  "Ġproduk ty",
131007
- "Ġsu um",
131008
- "Ġinflu enc",
131009
- "Ġcatch ing",
131010
- "ĠÏĮ ÏĦαν",
131011
- "7 37"
131012
  ]
131013
  }
131014
  }
 
40
  "special": true
41
  },
42
  {
43
+ "id": 65517,
44
  "content": "<|query_start|>",
45
  "single_word": false,
46
  "lstrip": false,
 
49
  "special": true
50
  },
51
  {
52
+ "id": 65518,
53
  "content": "<|query_end|>",
54
  "single_word": false,
55
  "lstrip": false,
 
58
  "special": true
59
  },
60
  {
61
+ "id": 65519,
62
+ "content": "<|source_start|>",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
+ },
69
+ {
70
+ "id": 65520,
71
  "content": "<|source_id|>",
72
  "single_word": false,
73
  "lstrip": false,
 
75
  "normalized": false,
76
  "special": true
77
  },
78
+ {
79
+ "id": 65521,
80
+ "content": "<|source_end|>",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
+ },
87
+ {
88
+ "id": 65522,
89
+ "content": "<|language_start|>",
90
+ "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
+ },
96
+ {
97
+ "id": 65523,
98
+ "content": "<|language_end|>",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
  {
106
  "id": 65524,
107
  "content": "<|query_analysis_start|>",
 
158
  },
159
  {
160
  "id": 65530,
161
+ "content": "<|source_report_start|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
 
167
  },
168
  {
169
  "id": 65531,
170
+ "content": "<|source_report_end|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
 
176
  },
177
  {
178
  "id": 65532,
179
+ "content": "<|draft_start|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
 
185
  },
186
  {
187
  "id": 65533,
188
+ "content": "<|draft_end|>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
 
194
  },
195
  {
196
  "id": 65534,
197
+ "content": "<|answer_start|>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
 
203
  },
204
  {
205
  "id": 65535,
206
+ "content": "<|answer_end|>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
 
231
  }
232
  ]
233
  },
234
+ "post_processor": {
235
+ "type": "Sequence",
236
+ "processors": [
237
+ {
238
+ "type": "ByteLevel",
239
+ "add_prefix_space": true,
240
+ "trim_offsets": false,
241
+ "use_regex": true
242
+ },
243
+ {
244
+ "type": "TemplateProcessing",
245
+ "single": [
246
+ {
247
+ "SpecialToken": {
248
+ "id": "<|end_of_text|>",
249
+ "type_id": 0
250
+ }
251
+ },
252
+ {
253
+ "Sequence": {
254
+ "id": "A",
255
+ "type_id": 0
256
+ }
257
+ }
258
+ ],
259
+ "pair": [
260
+ {
261
+ "SpecialToken": {
262
+ "id": "<|end_of_text|>",
263
+ "type_id": 0
264
+ }
265
+ },
266
+ {
267
+ "Sequence": {
268
+ "id": "A",
269
+ "type_id": 0
270
+ }
271
+ },
272
+ {
273
+ "SpecialToken": {
274
+ "id": "<|end_of_text|>",
275
+ "type_id": 0
276
+ }
277
+ },
278
+ {
279
+ "Sequence": {
280
+ "id": "B",
281
+ "type_id": 0
282
+ }
283
+ }
284
+ ],
285
+ "special_tokens": {
286
+ "<|begin_of_text|>": {
287
+ "id": "<|begin_of_text|>",
288
+ "ids": [
289
+ 1
290
+ ],
291
+ "tokens": [
292
+ "<|begin_of_text|>"
293
+ ]
294
+ },
295
+ "<|end_of_text|>": {
296
+ "id": "<|end_of_text|>",
297
+ "ids": [
298
+ 2
299
+ ],
300
+ "tokens": [
301
+ "<|end_of_text|>"
302
+ ]
303
+ },
304
+ "[PAD]": {
305
+ "id": "[PAD]",
306
+ "ids": [
307
+ 3
308
+ ],
309
+ "tokens": [
310
+ "[PAD]"
311
+ ]
312
+ },
313
+ "[UNK]": {
314
+ "id": "[UNK]",
315
+ "ids": [
316
+ 0
317
+ ],
318
+ "tokens": [
319
+ "[UNK]"
320
+ ]
321
+ }
322
+ }
323
+ }
324
+ ]
325
+ },
326
  "decoder": {
327
  "type": "ByteLevel",
328
  "add_prefix_space": true,
 
65855
  "Ġ709": 65513,
65856
  "Ġménages": 65514,
65857
  "Ġprodukty": 65515,
65858
+ "Ġsuum": 65516
 
 
 
 
65859
  },
65860
  "merges": [
65861
  "Ġ Ġ",
 
131127
  "Ġ7 09",
131128
  "Ġmén ages",
131129
  "Ġproduk ty",
131130
+ "Ġsu um"
 
 
 
 
131131
  ]
131132
  }
131133
  }
tokenizer_config.json CHANGED
@@ -32,7 +32,7 @@
32
  "single_word": false,
33
  "special": true
34
  },
35
- "65521": {
36
  "content": "<|query_start|>",
37
  "lstrip": false,
38
  "normalized": false,
@@ -40,7 +40,7 @@
40
  "single_word": false,
41
  "special": true
42
  },
43
- "65522": {
44
  "content": "<|query_end|>",
45
  "lstrip": false,
46
  "normalized": false,
@@ -48,7 +48,15 @@
48
  "single_word": false,
49
  "special": true
50
  },
51
- "65523": {
 
 
 
 
 
 
 
 
52
  "content": "<|source_id|>",
53
  "lstrip": false,
54
  "normalized": false,
@@ -56,6 +64,30 @@
56
  "single_word": false,
57
  "special": true
58
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  "65524": {
60
  "content": "<|query_analysis_start|>",
61
  "lstrip": false,
@@ -105,7 +137,7 @@
105
  "special": true
106
  },
107
  "65530": {
108
- "content": "<|draft_start|>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
@@ -113,7 +145,7 @@
113
  "special": true
114
  },
115
  "65531": {
116
- "content": "<|draft_end|>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
@@ -121,7 +153,7 @@
121
  "special": true
122
  },
123
  "65532": {
124
- "content": "<|answer_start|>",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
@@ -129,7 +161,7 @@
129
  "special": true
130
  },
131
  "65533": {
132
- "content": "<|answer_end|>",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
@@ -137,7 +169,7 @@
137
  "special": true
138
  },
139
  "65534": {
140
- "content": "<|source_start|>",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
@@ -145,7 +177,7 @@
145
  "special": true
146
  },
147
  "65535": {
148
- "content": "<|source_end|>",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
@@ -156,19 +188,23 @@
156
  "additional_special_tokens": [
157
  "<|query_start|>",
158
  "<|query_end|>",
 
159
  "<|source_id|>",
 
 
 
160
  "<|query_analysis_start|>",
161
  "<|query_analysis_end|>",
162
  "<|query_report_start|>",
163
  "<|query_report_end|>",
164
  "<|source_analysis_start|>",
165
  "<|source_analysis_end|>",
 
 
166
  "<|draft_start|>",
167
  "<|draft_end|>",
168
  "<|answer_start|>",
169
- "<|answer_end|>",
170
- "<|source_start|>",
171
- "<|source_end|>"
172
  ],
173
  "clean_up_tokenization_spaces": true,
174
  "model_max_length": 1000000000000000019884624838656,
 
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "65517": {
36
  "content": "<|query_start|>",
37
  "lstrip": false,
38
  "normalized": false,
 
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "65518": {
44
  "content": "<|query_end|>",
45
  "lstrip": false,
46
  "normalized": false,
 
48
  "single_word": false,
49
  "special": true
50
  },
51
+ "65519": {
52
+ "content": "<|source_start|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "65520": {
60
  "content": "<|source_id|>",
61
  "lstrip": false,
62
  "normalized": false,
 
64
  "single_word": false,
65
  "special": true
66
  },
67
+ "65521": {
68
+ "content": "<|source_end|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "65522": {
76
+ "content": "<|language_start|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "65523": {
84
+ "content": "<|language_end|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
  "65524": {
92
  "content": "<|query_analysis_start|>",
93
  "lstrip": false,
 
137
  "special": true
138
  },
139
  "65530": {
140
+ "content": "<|source_report_start|>",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
 
145
  "special": true
146
  },
147
  "65531": {
148
+ "content": "<|source_report_end|>",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
 
153
  "special": true
154
  },
155
  "65532": {
156
+ "content": "<|draft_start|>",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
 
161
  "special": true
162
  },
163
  "65533": {
164
+ "content": "<|draft_end|>",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
 
169
  "special": true
170
  },
171
  "65534": {
172
+ "content": "<|answer_start|>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
 
177
  "special": true
178
  },
179
  "65535": {
180
+ "content": "<|answer_end|>",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
 
188
  "additional_special_tokens": [
189
  "<|query_start|>",
190
  "<|query_end|>",
191
+ "<|source_start|>",
192
  "<|source_id|>",
193
+ "<|source_end|>",
194
+ "<|language_start|>",
195
+ "<|language_end|>",
196
  "<|query_analysis_start|>",
197
  "<|query_analysis_end|>",
198
  "<|query_report_start|>",
199
  "<|query_report_end|>",
200
  "<|source_analysis_start|>",
201
  "<|source_analysis_end|>",
202
+ "<|source_report_start|>",
203
+ "<|source_report_end|>",
204
  "<|draft_start|>",
205
  "<|draft_end|>",
206
  "<|answer_start|>",
207
+ "<|answer_end|>"
 
 
208
  ],
209
  "clean_up_tokenization_spaces": true,
210
  "model_max_length": 1000000000000000019884624838656,