nomic-embed-text-v1.5
#47
by
jlockhart
- opened
- README.md +7 -7
- config.json +1 -5
README.md
CHANGED
@@ -2609,7 +2609,7 @@ language:
|
|
2609 |
|
2610 |
# nomic-embed-text-v1.5: Resizable Production Embeddings with Matryoshka Representation Learning
|
2611 |
|
2612 |
-
[Blog](https://www.nomic.ai/blog/posts/nomic-embed-text-v1) | [Technical Report](https://arxiv.org/abs/2402.01613) | [AWS SageMaker](https://aws.amazon.com/marketplace/seller-profile?id=seller-tpqidcj54zawi) | [
|
2613 |
|
2614 |
**Exciting Update!**: `nomic-embed-text-v1.5` is now multimodal! [nomic-embed-vision-v1.5](https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5) is aligned to the embedding space of `nomic-embed-text-v1.5`, meaning any text embedding is multimodal!
|
2615 |
|
@@ -2630,7 +2630,7 @@ This prefix is used for embedding texts as documents, for example as documents f
|
|
2630 |
```python
|
2631 |
from sentence_transformers import SentenceTransformer
|
2632 |
|
2633 |
-
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1
|
2634 |
sentences = ['search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten']
|
2635 |
embeddings = model.encode(sentences)
|
2636 |
print(embeddings)
|
@@ -2645,7 +2645,7 @@ This prefix is used for embedding texts as questions that documents from a datas
|
|
2645 |
```python
|
2646 |
from sentence_transformers import SentenceTransformer
|
2647 |
|
2648 |
-
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1
|
2649 |
sentences = ['search_query: Who is Laurens van Der Maaten?']
|
2650 |
embeddings = model.encode(sentences)
|
2651 |
print(embeddings)
|
@@ -2660,7 +2660,7 @@ This prefix is used for embedding texts in order to group them into clusters, di
|
|
2660 |
```python
|
2661 |
from sentence_transformers import SentenceTransformer
|
2662 |
|
2663 |
-
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1
|
2664 |
sentences = ['clustering: the quick brown fox']
|
2665 |
embeddings = model.encode(sentences)
|
2666 |
print(embeddings)
|
@@ -2675,7 +2675,7 @@ This prefix is used for embedding texts into vectors that will be used as featur
|
|
2675 |
```python
|
2676 |
from sentence_transformers import SentenceTransformer
|
2677 |
|
2678 |
-
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1
|
2679 |
sentences = ['classification: the quick brown fox']
|
2680 |
embeddings = model.encode(sentences)
|
2681 |
print(embeddings)
|
@@ -2737,8 +2737,8 @@ The model natively supports scaling of the sequence length past 2048 tokens. To
|
|
2737 |
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', model_max_length=8192)
|
2738 |
|
2739 |
|
2740 |
-
- model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1
|
2741 |
-
+ model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1
|
2742 |
```
|
2743 |
|
2744 |
### Transformers.js
|
|
|
2609 |
|
2610 |
# nomic-embed-text-v1.5: Resizable Production Embeddings with Matryoshka Representation Learning
|
2611 |
|
2612 |
+
[Blog](https://www.nomic.ai/blog/posts/nomic-embed-text-v1) | [Technical Report](https://arxiv.org/abs/2402.01613) | [AWS SageMaker](https://aws.amazon.com/marketplace/seller-profile?id=seller-tpqidcj54zawi) | [Atlas Embedding and Unstructured Data Analytics Platform](https://atlas.nomic.ai)
|
2613 |
|
2614 |
**Exciting Update!**: `nomic-embed-text-v1.5` is now multimodal! [nomic-embed-vision-v1.5](https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5) is aligned to the embedding space of `nomic-embed-text-v1.5`, meaning any text embedding is multimodal!
|
2615 |
|
|
|
2630 |
```python
|
2631 |
from sentence_transformers import SentenceTransformer
|
2632 |
|
2633 |
+
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
|
2634 |
sentences = ['search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten']
|
2635 |
embeddings = model.encode(sentences)
|
2636 |
print(embeddings)
|
|
|
2645 |
```python
|
2646 |
from sentence_transformers import SentenceTransformer
|
2647 |
|
2648 |
+
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
|
2649 |
sentences = ['search_query: Who is Laurens van Der Maaten?']
|
2650 |
embeddings = model.encode(sentences)
|
2651 |
print(embeddings)
|
|
|
2660 |
```python
|
2661 |
from sentence_transformers import SentenceTransformer
|
2662 |
|
2663 |
+
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
|
2664 |
sentences = ['clustering: the quick brown fox']
|
2665 |
embeddings = model.encode(sentences)
|
2666 |
print(embeddings)
|
|
|
2675 |
```python
|
2676 |
from sentence_transformers import SentenceTransformer
|
2677 |
|
2678 |
+
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
|
2679 |
sentences = ['classification: the quick brown fox']
|
2680 |
embeddings = model.encode(sentences)
|
2681 |
print(embeddings)
|
|
|
2737 |
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', model_max_length=8192)
|
2738 |
|
2739 |
|
2740 |
+
- model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1', trust_remote_code=True)
|
2741 |
+
+ model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1', trust_remote_code=True, rotary_scaling_factor=2)
|
2742 |
```
|
2743 |
|
2744 |
### Transformers.js
|
config.json
CHANGED
@@ -7,11 +7,7 @@
|
|
7 |
"auto_map": {
|
8 |
"AutoConfig": "nomic-ai/nomic-bert-2048--configuration_hf_nomic_bert.NomicBertConfig",
|
9 |
"AutoModel": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertModel",
|
10 |
-
"AutoModelForMaskedLM": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForPreTraining"
|
11 |
-
"AutoModelForSequenceClassification": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForSequenceClassification",
|
12 |
-
"AutoModelForMultipleChoice": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForMultipleChoice",
|
13 |
-
"AutoModelForQuestionAnswering": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForQuestionAnswering",
|
14 |
-
"AutoModelForTokenClassification": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForTokenClassification"
|
15 |
},
|
16 |
"bos_token_id": null,
|
17 |
"causal": false,
|
|
|
7 |
"auto_map": {
|
8 |
"AutoConfig": "nomic-ai/nomic-bert-2048--configuration_hf_nomic_bert.NomicBertConfig",
|
9 |
"AutoModel": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertModel",
|
10 |
+
"AutoModelForMaskedLM": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForPreTraining"
|
|
|
|
|
|
|
|
|
11 |
},
|
12 |
"bos_token_id": null,
|
13 |
"causal": false,
|