from sentence_transformers import ( SentenceTransformer, export_static_quantized_openvino_model, export_dynamic_quantized_onnx_model, ) MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v1_0_7_8" def export_model(backend="onnx", use_qint8=False): if backend == "openvino": model = SentenceTransformer(MODEL_NAME, backend="openvino") if use_qint8: export_static_quantized_openvino_model( model, quantization_config=None, model_name_or_path=MODEL_NAME, push_to_hub=True, create_pr=True, ) else: model.push_to_hub(MODEL_NAME, create_pr=True) elif backend == "onnx": model = SentenceTransformer(MODEL_NAME, backend="onnx") if use_qint8: export_dynamic_quantized_onnx_model( model, "avx512_vnni", MODEL_NAME, push_to_hub=True ) else: model.push_to_hub(MODEL_NAME, create_pr=True) else: raise ValueError(f"Invalid backend: {backend}") # Export all combinations BACKEND = "openvino" USE_QINT8 = False print(f"Exporting {BACKEND} model with QINT8={USE_QINT8}") export_model(backend=BACKEND, use_qint8=USE_QINT8) # import tensorrt as trt # print(trt.__version__)