Pytorch and onnx format.

#13
by kuldiya - opened

By when the onnx and pytorch version of the model will become available ??

Hi @kuldiya 👋 You can find ONNX weights here: https://hf.co/onnx-community/embeddinggemma-300M-ONNX

Hi @kuldiya ,

Welcome to Google's Gemma family of open-source models, could you please confirm whether above requested query is resolved from the above comments. Thanks for your interest and patience.

Thanks.

how can i load this model using automodel.from like others emebdding model

from transformers import AutoModel, AutoTokenizer
import torch
import torch.nn.functional as F

class HFEmbeddingModel:
def init(self, model_name_or_path, device=None, normalize=True, pooling="mean"):
self.model_name_or_path = model_name_or_path
self.device = device if device is not None else ("cuda" if torch.cuda.is_available() else "cpu")
self.normalize = normalize
self.pooling = pooling # "mean", "cls", or others you want to support

    self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    self.model = AutoModel.from_pretrained(model_name_or_path)
    self.model.to(self.device)
    self.model.eval()



@torch
	.no_grad()
def embed(self, texts, max_length=128):
    """
    texts: str or list of str
    returns: tensor of shape (len(texts), hidden_size)
    """
    if isinstance(texts, str):
        texts = [texts]

    enc = self.tokenizer(
        texts,
        padding=True,
        truncation=True,
        max_length=max_length,
        return_tensors="pt"
    ).to(self.device)

    model_out = self.model(**enc)

    # pooling
    if self.pooling == "mean":
        token_embeddings = model_out.last_hidden_state  # (batch_size, seq_len, hidden_dim)
        attention_mask = enc.attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * attention_mask, dim=1)
        sum_mask = torch.clamp(attention_mask.sum(dim=1), min=1e-9)
        pooled = sum_embeddings / sum_mask

    elif self.pooling == "cls":
        pooled = model_out.last_hidden_state[:, 0]  # [CLS] token

    else:
        raise ValueError(f"Pooling mode {self.pooling} not supported")

    if self.normalize:
        pooled = F.normalize(pooled, p=2, dim=1)

    return pooled.cpu()

def similarity(self, query_emb, doc_embs):
    """
    Compute similarity between one query embedding and multiple document embeddings
    using dot product or cosine. Here using dot product on normalized embeddings => cosine similarity.
    """
    # ensure normalized if needed
    return torch.matmul(query_emb, doc_embs.T)

-- Usage:

model_name = "google/embeddinggemma-300m" # or your HF checkpoint
embedder = HFEmbeddingModel(model_name_or_path=model_name, normalize=True, pooling="cls")

query = "Which planet is known as the Red Planet?"
documents = [
"Venus is often called Earth's twin because of its similar size and proximity.",
"Mars, known for its reddish appearance, is often referred to as the Red Planet.",
"Jupiter, the largest planet in our solar system, has a prominent red spot.",
"Saturn, famous for its rings, is sometimes mistaken for the Red Planet."
]
query_emb = embedder.embed(query)
doc_embs = embedder.embed(documents)

print(query_emb.shape, doc_embs.shape) # (1, hidden_dim), (4, hidden_dim)

sims = embedder.similarity(query_emb, doc_embs)
print(sims)

this above code is giving compeltely different sim scores compare to sentence-tran based laod

Hi @kuldiya 👋 You can find ONNX weights here: https://hf.co/onnx-community/embeddinggemma-300M-ONNX

I have checked that and i have generate model.onnx file from model.onnx in this https://hf.co/onnx-community/embeddinggemma-300M-ONNX with the below script.
mport onnx
from onnx.external_data_helper import convert_model_from_external_data

def combine_onnx_files(model_path, output_path):
try:
print(f"Loading ONNX model from {model_path}...")
onnx_model = onnx.load(model_path)

    print("Embedding external data into the ONNX model...")
    convert_model_from_external_data(onnx_model)
    
    print(f"Saving combined model to {output_path}...")
    onnx.save(onnx_model, output_path)
    
    print("Successfully combined the ONNX model and its external data.")

except Exception as e:
    print(f"An error occurred: {e}")
    exit(1)

model_name = "model.onnx"
output_name = "model_combined.onnx"
combine_onnx_files(model_name, output_name)

Is it fine to use the model with the above script conversion please let me know if there is any issue with the method. Thanks.

Sign up or log in to comment