Update README.md
Browse files
README.md
CHANGED
@@ -30,8 +30,21 @@ import mlx.core as mx
|
|
30 |
|
31 |
model, tokenizer = load("mlx-community/embeddinggemma-300m-qat-q8_0-unquantized-bf16")
|
32 |
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
embeddings = output.text_embeds # Normalized embeddings
|
36 |
|
37 |
# Compute dot product between normalized embeddings
|
@@ -41,4 +54,22 @@ print("Similarity matrix between texts:")
|
|
41 |
print(similarity_matrix)
|
42 |
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
```
|
|
|
30 |
|
31 |
model, tokenizer = load("mlx-community/embeddinggemma-300m-qat-q8_0-unquantized-bf16")
|
32 |
|
33 |
+
|
34 |
+
# For text embedding
|
35 |
+
sentences = [
|
36 |
+
"task: sentence similarity | query: Nothing really matters.",
|
37 |
+
"task: sentence similarity | query: The dog is barking.",
|
38 |
+
"task: sentence similarity | query: The dog is barking.",
|
39 |
+
]
|
40 |
+
|
41 |
+
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='mlx')
|
42 |
+
|
43 |
+
# Compute token embeddings
|
44 |
+
input_ids = encoded_input['input_ids']
|
45 |
+
attention_mask = encoded_input['attention_mask']
|
46 |
+
output = model(input_ids, attention_mask)
|
47 |
+
|
48 |
embeddings = output.text_embeds # Normalized embeddings
|
49 |
|
50 |
# Compute dot product between normalized embeddings
|
|
|
54 |
print(similarity_matrix)
|
55 |
|
56 |
|
57 |
+
# You can use these task-specific prefixes for different tasks
|
58 |
+
task_prefixes = {
|
59 |
+
"BitextMining": "task: search result | query: ",
|
60 |
+
"Clustering": "task: clustering | query: ",
|
61 |
+
"Classification": "task: classification | query: ",
|
62 |
+
"MultilabelClassification": "task: classification | query: ",
|
63 |
+
"PairClassification": "task: sentence similarity | query: ",
|
64 |
+
"InstructionRetrieval": "task: code retrieval | query: ",
|
65 |
+
"Reranking": "task: search result | query: ",
|
66 |
+
"Retrieval": "task: search result | query: ",
|
67 |
+
"Retrieval-query": "task: search result | query: ",
|
68 |
+
"Retrieval-document": "title: none | text: ",
|
69 |
+
"STS": "task: sentence similarity | query: ",
|
70 |
+
"Summarization": "task: summarization | query: ",
|
71 |
+
"document": "title: none | text: "
|
72 |
+
}
|
73 |
+
|
74 |
+
|
75 |
```
|