|
from transformers import AutoModel, BertTokenizer, XLMRobertaTokenizerFast |
|
from temp.modeling_jina_v3_arctic_s import ConcatModel, ConcatTokenizer |
|
|
|
|
|
models = [ |
|
AutoModel.from_pretrained("jinaai/jina-embeddings-v3", trust_remote_code=True), |
|
AutoModel.from_pretrained("Snowflake/snowflake-arctic-embed-s", trust_remote_code=True) |
|
] |
|
model = ConcatModel(models) |
|
|
|
tokenizers = [ |
|
XLMRobertaTokenizerFast.from_pretrained("jinaai/jina-embeddings-v3"), |
|
BertTokenizer.from_pretrained("Snowflake/snowflake-arctic-embed-s") |
|
] |
|
tokenizer = ConcatTokenizer(tokenizers) |
|
|
|
output_path = 'temp' |
|
model.save_pretrained(output_path) |
|
tokenizer.save_pretrained(output_path) |
|
|
|
print(f'Model saved as {output_path}') |
|
|