ONNX Inference in NodeJS
#2
by
loretoparisi
- opened
I have successfully converted the sentencepiece.bpe.model
to JSON vocabulary and text merges. See here for more details about BPE SentencePiece tokenizer.
The tokenization works fine using tokenizers
library:
const { SentencePieceBPETokenizer } = require("tokenizers");
const tokenizer = await SentencePieceBPETokenizer.fromOptions({
vocabFile: "sentencepiece-vocab.json",
mergesFile: "sentencepiece-merges.txt",
});
let encoder = (tokenizer) => promisify(tokenizer.encode.bind(tokenizer))
encode = encoder(tokenizer);
encoded = await encode("Hello how are you?");
console.log("ids ", encoded.getIds());
console.log("tokens ", encoded.getTokens());
decoded = await tokenizer.decode(encoded.getIds(), skipSpecialTokens);
console.log(decoded); // hello how are you?
getting the expected result
ids [ 35377, 3641, 620, 397, 31 ]
tokens [ '▁Hello', '▁how', '▁are', '▁you', '?' ]
Hello how are you?
When passing the generated ids to the model inference in ONNX I get an error:
const ort = require('onnxruntime-node');
// onnx sesion
session = await ort.InferenceSession.create(self._options.model.path, options);
const encoded_ids = (await tokenizer.tokenize(text)).getIds();
const model_input = ONNX.create_model_input(encoded_ids);
const output = await session.run(model_input, ['output_0']);
and getting invalid expand shape
errors
2022-09-29 18:59:08.039 node[63957:683481] 2022-09-29 17:59:08.039329 [E:onnxruntime:, parallel_executor.cc:210 RunNodeAsync] Non-zero status code returned while running Expand node. Name:'Expand_21' Status Message: invalid expand shape
2022-09-29 18:59:08.040 node[63957:683429] 2022-09-29 17:59:08.039978 [E:onnxruntime:, parallel_executor.cc:75 Execute] [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Non-zero status code returned while running Expand node. Name:'Expand_21' Status Message: invalid expand shape
Model input was
model_input {
input_ids: l {
dims: [ 1, 604 ],
type: 'int64',
data: BigInt64Array(604) [
101n, 5n, 0n, 567n, 807n, 4504n, 91n, 397n,
6164n, 75n, 1379n, 644n, 0n, 69084n, 1365n, 86n,
49781n, 441n, 5674n, 515n, 58967n, 69n, 4223n, 1080n,
34270n, 0n, 283n, 106n, 397n, 3567n, 3677n, 31n,
15900n, 397n, 220n, 1296n, 31n, 0n, 131n, 617n,
4504n, 91n, 1671n, 441n, 15n, 0n, 8330n, 89n,
18n, 24n, 17n, 26865n, 0n, 37838n, 2173n, 441n,
30481n, 397n, 12318n, 4126n, 7067n, 86n, 5153n, 53n,
441n, 0n, 567n, 2300n, 24n, 17n, 3713n, 2366n,
397n, 24n, 106n, 89288n, 99n, 0n, 918n, 11632n,
758n, 79196n, 256n, 3407n, 9587n, 213n, 0n, 23388n,
24n, 6n, 5791n, 5044n, 80096n, 0n, 567n, 24n,
38n, 705n, 190n, 75n,
... 504 more items
],
size: 604
},
attention_mask: l {
dims: [ 1, 604 ],
type: 'int64',
data: BigInt64Array(604) [
1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n,
1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n,
1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n,
1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n,
1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n,
1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n,
1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n,
1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n, 1n,
1n, 1n, 1n, 1n,
... 504 more items
],
size: 604
},
token_type_ids: l {
dims: [ 1, 604 ],
type: 'int64',
data: BigInt64Array(604) [
0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n,
0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n,
0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n,
0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n,
0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n,
0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n,
0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n,
0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n,
0n, 0n, 0n, 0n,
... 504 more items
],
size: 604
}
}