Upload processor
Browse files- chat_template.json +3 -0
- preprocessor_config.json +15 -0
- processor_config.json +6 -0
- tokenizer_config.json +1 -0
chat_template.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"chat_template": "{% set audio_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if 'audio' in content or 'audio_url' in content or message['type'] == 'audio' %}{% set audio_count.value = audio_count.value + 1 %}Audio {{ audio_count.value }}: <|audio_bos|><|AUDIO|><|audio_eos|>\n{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
|
3 |
+
}
|
preprocessor_config.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"chunk_length": 30,
|
3 |
+
"dither": 0.0,
|
4 |
+
"feature_extractor_type": "WhisperFeatureExtractor",
|
5 |
+
"feature_size": 128,
|
6 |
+
"hop_length": 160,
|
7 |
+
"n_fft": 400,
|
8 |
+
"n_samples": 480000,
|
9 |
+
"nb_max_frames": 3000,
|
10 |
+
"padding_side": "right",
|
11 |
+
"padding_value": 0.0,
|
12 |
+
"processor_class": "Qwen2AudioProcessor",
|
13 |
+
"return_attention_mask": true,
|
14 |
+
"sampling_rate": 16000
|
15 |
+
}
|
processor_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"audio_bos_token": "<|audio_bos|>",
|
3 |
+
"audio_eos_token": "<|audio_eos|>",
|
4 |
+
"audio_token": "<|AUDIO|>",
|
5 |
+
"processor_class": "Qwen2AudioProcessor"
|
6 |
+
}
|
tokenizer_config.json
CHANGED
@@ -29604,6 +29604,7 @@
|
|
29604 |
"model_max_length": 8192,
|
29605 |
"pad_token": "<|endoftext|>",
|
29606 |
"padding_side": "left",
|
|
|
29607 |
"split_special_tokens": false,
|
29608 |
"tokenizer_class": "Qwen2Tokenizer",
|
29609 |
"unk_token": null
|
|
|
29604 |
"model_max_length": 8192,
|
29605 |
"pad_token": "<|endoftext|>",
|
29606 |
"padding_side": "left",
|
29607 |
+
"processor_class": "Qwen2AudioProcessor",
|
29608 |
"split_special_tokens": false,
|
29609 |
"tokenizer_class": "Qwen2Tokenizer",
|
29610 |
"unk_token": null
|