Commit
·
d33db5e
1
Parent(s):
82db705
update model
Browse files- sherpa-onnx-asr.js +82 -6
- sherpa-onnx-wasm-main-vad-asr.js +0 -0
- sherpa-onnx-wasm-main-vad-asr.wasm +2 -2
sherpa-onnx-asr.js
CHANGED
@@ -31,6 +31,10 @@ function freeConfig(config, Module) {
|
|
31 |
freeConfig(config.nemoCtc, Module)
|
32 |
}
|
33 |
|
|
|
|
|
|
|
|
|
34 |
if ('whisper' in config) {
|
35 |
freeConfig(config.whisper, Module)
|
36 |
}
|
@@ -47,6 +51,10 @@ function freeConfig(config, Module) {
|
|
47 |
freeConfig(config.zipformerCtc, Module)
|
48 |
}
|
49 |
|
|
|
|
|
|
|
|
|
50 |
if ('moonshine' in config) {
|
51 |
freeConfig(config.moonshine, Module)
|
52 |
}
|
@@ -173,6 +181,22 @@ function initSherpaOnnxOnlineNemoCtcModelConfig(config, Module) {
|
|
173 |
}
|
174 |
}
|
175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
function initSherpaOnnxOnlineModelConfig(config, Module) {
|
177 |
if (!('transducer' in config)) {
|
178 |
config.transducer = {
|
@@ -201,6 +225,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
|
|
201 |
};
|
202 |
}
|
203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
if (!('tokensBuf' in config)) {
|
205 |
config.tokensBuf = '';
|
206 |
}
|
@@ -221,8 +251,11 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
|
|
221 |
const nemoCtc =
|
222 |
initSherpaOnnxOnlineNemoCtcModelConfig(config.nemoCtc, Module);
|
223 |
|
224 |
-
const
|
225 |
-
|
|
|
|
|
|
|
226 |
|
227 |
const ptr = Module._malloc(len);
|
228 |
|
@@ -308,9 +341,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
|
|
308 |
Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
|
309 |
offset += nemoCtc.len;
|
310 |
|
|
|
|
|
|
|
311 |
return {
|
312 |
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
313 |
-
paraformer: paraformer, zipformer2Ctc: zipformer2Ctc, nemoCtc: nemoCtc
|
|
|
314 |
}
|
315 |
}
|
316 |
|
@@ -519,6 +556,10 @@ function createOnlineRecognizer(Module, myConfig) {
|
|
519 |
model: '',
|
520 |
};
|
521 |
|
|
|
|
|
|
|
|
|
522 |
let type = 0;
|
523 |
|
524 |
switch (type) {
|
@@ -541,6 +582,10 @@ function createOnlineRecognizer(Module, myConfig) {
|
|
541 |
// nemoCtc
|
542 |
onlineNemoCtcModelConfig.model = './nemo-ctc.onnx';
|
543 |
break;
|
|
|
|
|
|
|
|
|
544 |
}
|
545 |
|
546 |
|
@@ -549,6 +594,7 @@ function createOnlineRecognizer(Module, myConfig) {
|
|
549 |
paraformer: onlineParaformerModelConfig,
|
550 |
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
551 |
nemoCtc: onlineNemoCtcModelConfig,
|
|
|
552 |
tokens: './tokens.txt',
|
553 |
numThreads: 1,
|
554 |
provider: 'cpu',
|
@@ -559,8 +605,8 @@ function createOnlineRecognizer(Module, myConfig) {
|
|
559 |
};
|
560 |
|
561 |
const featureConfig = {
|
562 |
-
sampleRate: 16000,
|
563 |
-
featureDim: 80,
|
564 |
};
|
565 |
|
566 |
let recognizerConfig = {
|
@@ -691,6 +737,23 @@ function initSherpaOnnxOfflineZipformerCtcModelConfig(config, Module) {
|
|
691 |
}
|
692 |
}
|
693 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
694 |
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
695 |
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
|
696 |
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
|
@@ -955,6 +1018,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
|
955 |
};
|
956 |
}
|
957 |
|
|
|
|
|
|
|
|
|
|
|
|
|
958 |
if (!('whisper' in config)) {
|
959 |
config.whisper = {
|
960 |
encoder: '',
|
@@ -1036,9 +1105,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
|
1036 |
|
1037 |
const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module);
|
1038 |
|
|
|
|
|
|
|
1039 |
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
1040 |
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
|
1041 |
-
dolphin.len + zipformerCtc.len + canary.len;
|
1042 |
|
1043 |
const ptr = Module._malloc(len);
|
1044 |
|
@@ -1146,11 +1218,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
|
1146 |
Module._CopyHeap(canary.ptr, canary.len, ptr + offset);
|
1147 |
offset += canary.len;
|
1148 |
|
|
|
|
|
|
|
1149 |
return {
|
1150 |
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
1151 |
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
|
1152 |
senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
|
1153 |
dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary,
|
|
|
1154 |
}
|
1155 |
}
|
1156 |
|
|
|
31 |
freeConfig(config.nemoCtc, Module)
|
32 |
}
|
33 |
|
34 |
+
if ('toneCtc' in config) {
|
35 |
+
freeConfig(config.toneCtc, Module)
|
36 |
+
}
|
37 |
+
|
38 |
if ('whisper' in config) {
|
39 |
freeConfig(config.whisper, Module)
|
40 |
}
|
|
|
51 |
freeConfig(config.zipformerCtc, Module)
|
52 |
}
|
53 |
|
54 |
+
if ('wenetCtc' in config) {
|
55 |
+
freeConfig(config.wenetCtc, Module)
|
56 |
+
}
|
57 |
+
|
58 |
if ('moonshine' in config) {
|
59 |
freeConfig(config.moonshine, Module)
|
60 |
}
|
|
|
181 |
}
|
182 |
}
|
183 |
|
184 |
+
function initSherpaOnnxOnlineToneCtcModelConfig(config, Module) {
|
185 |
+
const n = Module.lengthBytesUTF8(config.model || '') + 1;
|
186 |
+
const buffer = Module._malloc(n);
|
187 |
+
|
188 |
+
const len = 1 * 4; // 1 pointer
|
189 |
+
const ptr = Module._malloc(len);
|
190 |
+
|
191 |
+
Module.stringToUTF8(config.model || '', buffer, n);
|
192 |
+
|
193 |
+
Module.setValue(ptr, buffer, 'i8*');
|
194 |
+
|
195 |
+
return {
|
196 |
+
buffer: buffer, ptr: ptr, len: len,
|
197 |
+
}
|
198 |
+
}
|
199 |
+
|
200 |
function initSherpaOnnxOnlineModelConfig(config, Module) {
|
201 |
if (!('transducer' in config)) {
|
202 |
config.transducer = {
|
|
|
225 |
};
|
226 |
}
|
227 |
|
228 |
+
if (!('toneCtc' in config)) {
|
229 |
+
config.toneCtc = {
|
230 |
+
model: '',
|
231 |
+
};
|
232 |
+
}
|
233 |
+
|
234 |
if (!('tokensBuf' in config)) {
|
235 |
config.tokensBuf = '';
|
236 |
}
|
|
|
251 |
const nemoCtc =
|
252 |
initSherpaOnnxOnlineNemoCtcModelConfig(config.nemoCtc, Module);
|
253 |
|
254 |
+
const toneCtc =
|
255 |
+
initSherpaOnnxOnlineToneCtcModelConfig(config.toneCtc, Module);
|
256 |
+
|
257 |
+
const len = transducer.len + paraformer.len + zipformer2Ctc.len + 9 * 4 +
|
258 |
+
nemoCtc.len + toneCtc.len;
|
259 |
|
260 |
const ptr = Module._malloc(len);
|
261 |
|
|
|
341 |
Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
|
342 |
offset += nemoCtc.len;
|
343 |
|
344 |
+
Module._CopyHeap(toneCtc.ptr, toneCtc.len, ptr + offset);
|
345 |
+
offset += toneCtc.len;
|
346 |
+
|
347 |
return {
|
348 |
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
349 |
+
paraformer: paraformer, zipformer2Ctc: zipformer2Ctc, nemoCtc: nemoCtc,
|
350 |
+
toneCtc: toneCtc,
|
351 |
}
|
352 |
}
|
353 |
|
|
|
556 |
model: '',
|
557 |
};
|
558 |
|
559 |
+
const onlineToneCtcModelConfig = {
|
560 |
+
model: '',
|
561 |
+
};
|
562 |
+
|
563 |
let type = 0;
|
564 |
|
565 |
switch (type) {
|
|
|
582 |
// nemoCtc
|
583 |
onlineNemoCtcModelConfig.model = './nemo-ctc.onnx';
|
584 |
break;
|
585 |
+
case 4:
|
586 |
+
// toneCtc
|
587 |
+
onlineToneCtcModelConfig.model = './tone-ctc.onnx';
|
588 |
+
break;
|
589 |
}
|
590 |
|
591 |
|
|
|
594 |
paraformer: onlineParaformerModelConfig,
|
595 |
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
596 |
nemoCtc: onlineNemoCtcModelConfig,
|
597 |
+
toneCtc: onlineToneCtcModelConfig,
|
598 |
tokens: './tokens.txt',
|
599 |
numThreads: 1,
|
600 |
provider: 'cpu',
|
|
|
605 |
};
|
606 |
|
607 |
const featureConfig = {
|
608 |
+
sampleRate: 16000, // it is ignored when toneCtc is used
|
609 |
+
featureDim: 80, // it is ignored when toneCtc is used
|
610 |
};
|
611 |
|
612 |
let recognizerConfig = {
|
|
|
737 |
}
|
738 |
}
|
739 |
|
740 |
+
function initSherpaOnnxOfflineWenetCtcModelConfig(config, Module) {
|
741 |
+
const n = Module.lengthBytesUTF8(config.model || '') + 1;
|
742 |
+
|
743 |
+
const buffer = Module._malloc(n);
|
744 |
+
|
745 |
+
const len = 1 * 4; // 1 pointer
|
746 |
+
const ptr = Module._malloc(len);
|
747 |
+
|
748 |
+
Module.stringToUTF8(config.model || '', buffer, n);
|
749 |
+
|
750 |
+
Module.setValue(ptr, buffer, 'i8*');
|
751 |
+
|
752 |
+
return {
|
753 |
+
buffer: buffer, ptr: ptr, len: len,
|
754 |
+
}
|
755 |
+
}
|
756 |
+
|
757 |
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
758 |
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
|
759 |
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
|
|
|
1018 |
};
|
1019 |
}
|
1020 |
|
1021 |
+
if (!('wenetCtc' in config)) {
|
1022 |
+
config.wenetCtc = {
|
1023 |
+
model: '',
|
1024 |
+
};
|
1025 |
+
}
|
1026 |
+
|
1027 |
if (!('whisper' in config)) {
|
1028 |
config.whisper = {
|
1029 |
encoder: '',
|
|
|
1105 |
|
1106 |
const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module);
|
1107 |
|
1108 |
+
const wenetCtc =
|
1109 |
+
initSherpaOnnxOfflineWenetCtcModelConfig(config.wenetCtc, Module);
|
1110 |
+
|
1111 |
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
1112 |
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
|
1113 |
+
dolphin.len + zipformerCtc.len + canary.len + wenetCtc.len;
|
1114 |
|
1115 |
const ptr = Module._malloc(len);
|
1116 |
|
|
|
1218 |
Module._CopyHeap(canary.ptr, canary.len, ptr + offset);
|
1219 |
offset += canary.len;
|
1220 |
|
1221 |
+
Module._CopyHeap(wenetCtc.ptr, wenetCtc.len, ptr + offset);
|
1222 |
+
offset += wenetCtc.len;
|
1223 |
+
|
1224 |
return {
|
1225 |
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
1226 |
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
|
1227 |
senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
|
1228 |
dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary,
|
1229 |
+
wenetCtc: wenetCtc,
|
1230 |
}
|
1231 |
}
|
1232 |
|
sherpa-onnx-wasm-main-vad-asr.js
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
sherpa-onnx-wasm-main-vad-asr.wasm
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f530fc3dfacc8109b902cbb9b77ab37f1b355a0e6b9273d16d7e295caaae5186
|
3 |
+
size 11564038
|