tasal9's picture
Add simple_demo.py
7995949 verified
raw
history blame
3.1 kB
"""
ZamAI Simple Multilingual Embeddings Demo
This script demonstrates embedding sentences in multiple languages, including Pashto.
"""
from sentence_transformers import SentenceTransformer
import numpy as np
def cosine_similarity(vec1, vec2):
"""Calculate cosine similarity between two vectors"""
dot_product = np.dot(vec1, vec2)
norm1 = np.linalg.norm(vec1)
norm2 = np.linalg.norm(vec2)
return dot_product / (norm1 * norm2)
def print_similarities(model, sentences, query_idx=0):
"""Print similarity scores between a query and all other sentences"""
# Get embeddings for all sentences
embeddings = model.encode(sentences)
# Get the query embedding
query_embedding = embeddings[query_idx]
query = sentences[query_idx]
print(f"Query: '{query}'")
print("Similarities:")
# Calculate similarities with all other sentences
for i, sentence in enumerate(sentences):
if i == query_idx:
continue
similarity = cosine_similarity(query_embedding, embeddings[i])
print(f"- {similarity:.4f}: '{sentence}'")
print()
def main():
# Load the multilingual model
print("Loading multilingual embedding model...")
model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
model = SentenceTransformer(model_name)
print(f"Model loaded: {model_name}")
# English-English similarity
print("\n===== English-English Similarity =====")
english_sentences = [
"This is a sample sentence in English.",
"This sentence is similar to the first one.",
"This sentence has nothing to do with the others."
]
print_similarities(model, english_sentences, query_idx=0)
# Pashto-Pashto similarity
print("\n===== Pashto-Pashto Similarity =====")
pashto_sentences = [
"دا په پښتو کې یوه نمونه جمله ده.", # This is a sample sentence in Pashto.
"دا جمله د لومړۍ جملې سره ورته ده.", # This sentence is similar to the first one.
"دا جمله د نورو سره هېڅ تړاو نلري." # This sentence has nothing to do with the others.
]
print_similarities(model, pashto_sentences, query_idx=0)
# Cross-lingual similarity (English-Pashto)
print("\n===== Cross-lingual Similarity (English-Pashto) =====")
cross_lingual_sentences = [
"This is a sample sentence in English.",
"دا په پښتو کې یوه نمونه جمله ده.", # This is a sample sentence in Pashto.
"I'm learning to speak Pashto.",
"زه د پښتو ژبې زده کړه کوم." # I'm learning the Pashto language.
]
print_similarities(model, cross_lingual_sentences, query_idx=0)
# Cross-lingual similarity (Pashto-English)
print("\n===== Cross-lingual Similarity (Pashto-English) =====")
print_similarities(model, cross_lingual_sentences, query_idx=1)
if __name__ == "__main__":
main()