[WIP] Transformers.js improvements
Browse files- default to q4 since the model is quite large
- support external data format (see [here](https://github.com/huggingface/transformers.js/pull/1212))
- config.json +5 -0
config.json
CHANGED
@@ -25,9 +25,14 @@
|
|
25 |
"torch_dtype": "bfloat16",
|
26 |
"transformers_version": "4.42.3",
|
27 |
"transformers.js_config": {
|
|
|
28 |
"kv_cache_dtype": {
|
29 |
"q4f16": "float16",
|
30 |
"fp16": "float16"
|
|
|
|
|
|
|
|
|
31 |
}
|
32 |
},
|
33 |
"use_cache": true,
|
|
|
25 |
"torch_dtype": "bfloat16",
|
26 |
"transformers_version": "4.42.3",
|
27 |
"transformers.js_config": {
|
28 |
+
"dtype": "q4",
|
29 |
"kv_cache_dtype": {
|
30 |
"q4f16": "float16",
|
31 |
"fp16": "float16"
|
32 |
+
},
|
33 |
+
"use_external_data_format": {
|
34 |
+
"model.onnx": true,
|
35 |
+
"model_fp16.onnx": true
|
36 |
}
|
37 |
},
|
38 |
"use_cache": true,
|