""" ZamAI Simple Multilingual Embeddings Demo This script demonstrates embedding sentences in multiple languages, including Pashto. """ from sentence_transformers import SentenceTransformer import numpy as np def cosine_similarity(vec1, vec2): """Calculate cosine similarity between two vectors""" dot_product = np.dot(vec1, vec2) norm1 = np.linalg.norm(vec1) norm2 = np.linalg.norm(vec2) return dot_product / (norm1 * norm2) def print_similarities(model, sentences, query_idx=0): """Print similarity scores between a query and all other sentences""" # Get embeddings for all sentences embeddings = model.encode(sentences) # Get the query embedding query_embedding = embeddings[query_idx] query = sentences[query_idx] print(f"Query: '{query}'") print("Similarities:") # Calculate similarities with all other sentences for i, sentence in enumerate(sentences): if i == query_idx: continue similarity = cosine_similarity(query_embedding, embeddings[i]) print(f"- {similarity:.4f}: '{sentence}'") print() def main(): # Load the multilingual model print("Loading multilingual embedding model...") model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" model = SentenceTransformer(model_name) print(f"Model loaded: {model_name}") # English-English similarity print("\n===== English-English Similarity =====") english_sentences = [ "This is a sample sentence in English.", "This sentence is similar to the first one.", "This sentence has nothing to do with the others." ] print_similarities(model, english_sentences, query_idx=0) # Pashto-Pashto similarity print("\n===== Pashto-Pashto Similarity =====") pashto_sentences = [ "دا په پښتو کې یوه نمونه جمله ده.", # This is a sample sentence in Pashto. "دا جمله د لومړۍ جملې سره ورته ده.", # This sentence is similar to the first one. "دا جمله د نورو سره هېڅ تړاو نلري." # This sentence has nothing to do with the others. ] print_similarities(model, pashto_sentences, query_idx=0) # Cross-lingual similarity (English-Pashto) print("\n===== Cross-lingual Similarity (English-Pashto) =====") cross_lingual_sentences = [ "This is a sample sentence in English.", "دا په پښتو کې یوه نمونه جمله ده.", # This is a sample sentence in Pashto. "I'm learning to speak Pashto.", "زه د پښتو ژبې زده کړه کوم." # I'm learning the Pashto language. ] print_similarities(model, cross_lingual_sentences, query_idx=0) # Cross-lingual similarity (Pashto-English) print("\n===== Cross-lingual Similarity (Pashto-English) =====") print_similarities(model, cross_lingual_sentences, query_idx=1) if __name__ == "__main__": main()