Files changed (2) hide show
  1. README.md +7 -7
  2. config.json +1 -5
README.md CHANGED
@@ -2609,7 +2609,7 @@ language:
2609
 
2610
  # nomic-embed-text-v1.5: Resizable Production Embeddings with Matryoshka Representation Learning
2611
 
2612
- [Blog](https://www.nomic.ai/blog/posts/nomic-embed-text-v1) | [Technical Report](https://arxiv.org/abs/2402.01613) | [AWS SageMaker](https://aws.amazon.com/marketplace/seller-profile?id=seller-tpqidcj54zawi) | [Nomic Platform](https://atlas.nomic.ai)
2613
 
2614
  **Exciting Update!**: `nomic-embed-text-v1.5` is now multimodal! [nomic-embed-vision-v1.5](https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5) is aligned to the embedding space of `nomic-embed-text-v1.5`, meaning any text embedding is multimodal!
2615
 
@@ -2630,7 +2630,7 @@ This prefix is used for embedding texts as documents, for example as documents f
2630
  ```python
2631
  from sentence_transformers import SentenceTransformer
2632
 
2633
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2634
  sentences = ['search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten']
2635
  embeddings = model.encode(sentences)
2636
  print(embeddings)
@@ -2645,7 +2645,7 @@ This prefix is used for embedding texts as questions that documents from a datas
2645
  ```python
2646
  from sentence_transformers import SentenceTransformer
2647
 
2648
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2649
  sentences = ['search_query: Who is Laurens van Der Maaten?']
2650
  embeddings = model.encode(sentences)
2651
  print(embeddings)
@@ -2660,7 +2660,7 @@ This prefix is used for embedding texts in order to group them into clusters, di
2660
  ```python
2661
  from sentence_transformers import SentenceTransformer
2662
 
2663
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2664
  sentences = ['clustering: the quick brown fox']
2665
  embeddings = model.encode(sentences)
2666
  print(embeddings)
@@ -2675,7 +2675,7 @@ This prefix is used for embedding texts into vectors that will be used as featur
2675
  ```python
2676
  from sentence_transformers import SentenceTransformer
2677
 
2678
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2679
  sentences = ['classification: the quick brown fox']
2680
  embeddings = model.encode(sentences)
2681
  print(embeddings)
@@ -2737,8 +2737,8 @@ The model natively supports scaling of the sequence length past 2048 tokens. To
2737
  + tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', model_max_length=8192)
2738
 
2739
 
2740
- - model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
2741
- + model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True, rotary_scaling_factor=2)
2742
  ```
2743
 
2744
  ### Transformers.js
 
2609
 
2610
  # nomic-embed-text-v1.5: Resizable Production Embeddings with Matryoshka Representation Learning
2611
 
2612
+ [Blog](https://www.nomic.ai/blog/posts/nomic-embed-text-v1) | [Technical Report](https://arxiv.org/abs/2402.01613) | [AWS SageMaker](https://aws.amazon.com/marketplace/seller-profile?id=seller-tpqidcj54zawi) | [Atlas Embedding and Unstructured Data Analytics Platform](https://atlas.nomic.ai)
2613
 
2614
  **Exciting Update!**: `nomic-embed-text-v1.5` is now multimodal! [nomic-embed-vision-v1.5](https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5) is aligned to the embedding space of `nomic-embed-text-v1.5`, meaning any text embedding is multimodal!
2615
 
 
2630
  ```python
2631
  from sentence_transformers import SentenceTransformer
2632
 
2633
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
2634
  sentences = ['search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten']
2635
  embeddings = model.encode(sentences)
2636
  print(embeddings)
 
2645
  ```python
2646
  from sentence_transformers import SentenceTransformer
2647
 
2648
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
2649
  sentences = ['search_query: Who is Laurens van Der Maaten?']
2650
  embeddings = model.encode(sentences)
2651
  print(embeddings)
 
2660
  ```python
2661
  from sentence_transformers import SentenceTransformer
2662
 
2663
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
2664
  sentences = ['clustering: the quick brown fox']
2665
  embeddings = model.encode(sentences)
2666
  print(embeddings)
 
2675
  ```python
2676
  from sentence_transformers import SentenceTransformer
2677
 
2678
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
2679
  sentences = ['classification: the quick brown fox']
2680
  embeddings = model.encode(sentences)
2681
  print(embeddings)
 
2737
  + tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', model_max_length=8192)
2738
 
2739
 
2740
+ - model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1', trust_remote_code=True)
2741
+ + model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1', trust_remote_code=True, rotary_scaling_factor=2)
2742
  ```
2743
 
2744
  ### Transformers.js
config.json CHANGED
@@ -7,11 +7,7 @@
7
  "auto_map": {
8
  "AutoConfig": "nomic-ai/nomic-bert-2048--configuration_hf_nomic_bert.NomicBertConfig",
9
  "AutoModel": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertModel",
10
- "AutoModelForMaskedLM": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForPreTraining",
11
- "AutoModelForSequenceClassification": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForSequenceClassification",
12
- "AutoModelForMultipleChoice": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForMultipleChoice",
13
- "AutoModelForQuestionAnswering": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForQuestionAnswering",
14
- "AutoModelForTokenClassification": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForTokenClassification"
15
  },
16
  "bos_token_id": null,
17
  "causal": false,
 
7
  "auto_map": {
8
  "AutoConfig": "nomic-ai/nomic-bert-2048--configuration_hf_nomic_bert.NomicBertConfig",
9
  "AutoModel": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertModel",
10
+ "AutoModelForMaskedLM": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForPreTraining"
 
 
 
 
11
  },
12
  "bos_token_id": null,
13
  "causal": false,