Pytorch and onnx format.
By when the onnx and pytorch version of the model will become available ??
Hi @kuldiya ,
Welcome to Google's Gemma family of open-source models, could you please confirm whether above requested query is resolved from the above comments. Thanks for your interest and patience.
Thanks.
how can i load this model using automodel.from like others emebdding model
from transformers import AutoModel, AutoTokenizer
import torch
import torch.nn.functional as F
class HFEmbeddingModel:
def init(self, model_name_or_path, device=None, normalize=True, pooling="mean"):
self.model_name_or_path = model_name_or_path
self.device = device if device is not None else ("cuda" if torch.cuda.is_available() else "cpu")
self.normalize = normalize
self.pooling = pooling # "mean", "cls", or others you want to support
self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
self.model = AutoModel.from_pretrained(model_name_or_path)
self.model.to(self.device)
self.model.eval()
@torch
.no_grad()
def embed(self, texts, max_length=128):
"""
texts: str or list of str
returns: tensor of shape (len(texts), hidden_size)
"""
if isinstance(texts, str):
texts = [texts]
enc = self.tokenizer(
texts,
padding=True,
truncation=True,
max_length=max_length,
return_tensors="pt"
).to(self.device)
model_out = self.model(**enc)
# pooling
if self.pooling == "mean":
token_embeddings = model_out.last_hidden_state # (batch_size, seq_len, hidden_dim)
attention_mask = enc.attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
sum_embeddings = torch.sum(token_embeddings * attention_mask, dim=1)
sum_mask = torch.clamp(attention_mask.sum(dim=1), min=1e-9)
pooled = sum_embeddings / sum_mask
elif self.pooling == "cls":
pooled = model_out.last_hidden_state[:, 0] # [CLS] token
else:
raise ValueError(f"Pooling mode {self.pooling} not supported")
if self.normalize:
pooled = F.normalize(pooled, p=2, dim=1)
return pooled.cpu()
def similarity(self, query_emb, doc_embs):
"""
Compute similarity between one query embedding and multiple document embeddings
using dot product or cosine. Here using dot product on normalized embeddings => cosine similarity.
"""
# ensure normalized if needed
return torch.matmul(query_emb, doc_embs.T)
-- Usage:
model_name = "google/embeddinggemma-300m" # or your HF checkpoint
embedder = HFEmbeddingModel(model_name_or_path=model_name, normalize=True, pooling="cls")
query = "Which planet is known as the Red Planet?"
documents = [
"Venus is often called Earth's twin because of its similar size and proximity.",
"Mars, known for its reddish appearance, is often referred to as the Red Planet.",
"Jupiter, the largest planet in our solar system, has a prominent red spot.",
"Saturn, famous for its rings, is sometimes mistaken for the Red Planet."
]
query_emb = embedder.embed(query)
doc_embs = embedder.embed(documents)
print(query_emb.shape, doc_embs.shape) # (1, hidden_dim), (4, hidden_dim)
sims = embedder.similarity(query_emb, doc_embs)
print(sims)
this above code is giving compeltely different sim scores compare to sentence-tran based laod
Hi @kuldiya 👋 You can find ONNX weights here: https://hf.co/onnx-community/embeddinggemma-300M-ONNX
I have checked that and i have generate model.onnx file from model.onnx in this https://hf.co/onnx-community/embeddinggemma-300M-ONNX with the below script.
mport onnx
from onnx.external_data_helper import convert_model_from_external_data
def combine_onnx_files(model_path, output_path):
try:
print(f"Loading ONNX model from {model_path}...")
onnx_model = onnx.load(model_path)
print("Embedding external data into the ONNX model...")
convert_model_from_external_data(onnx_model)
print(f"Saving combined model to {output_path}...")
onnx.save(onnx_model, output_path)
print("Successfully combined the ONNX model and its external data.")
except Exception as e:
print(f"An error occurred: {e}")
exit(1)
model_name = "model.onnx"
output_name = "model_combined.onnx"
combine_onnx_files(model_name, output_name)
Is it fine to use the model with the above script conversion please let me know if there is any issue with the method. Thanks.