grantpitt
/

clip-l336

Feature Extraction

Inference Endpoints

Model card Files Files and versions Community

clip-l336 / handler.py

grantpitt's picture

ass handler

aa4a215 about 2 years ago

history blame contribute delete

1.1 kB

	from typing import Dict, List, Any
	from transformers import CLIPTokenizer, CLIPModel
	import numpy as np


	class EndpointHandler:
	def __init__(self, path=""):
	# load the model
	repo_id = "openai/clip-vit-large-patch14-336"
	self.model = CLIPModel.from_pretrained(repo_id)
	self.tokenizer = CLIPTokenizer.from_pretrained(repo_id)

	def __call__(self, data: Dict[str, Any]) -> List[float]:
	"""
	data args:
	inputs (:obj: `str` \| `PIL.Image` \| `np.array`)
	kwargs
	Return:
	A :obj:`list` \| `dict`: will be serialized and returned
	"""
	# compute the embedding of the input
	query = data["inputs"]
	inputs = self.tokenizer(query, padding=True, return_tensors="pt")
	text_features = self.model.get_text_features(**inputs)
	text_features = text_features.detach().numpy()
	input_embedding = text_features[0]

	# normalize the embedding
	input_embedding = input_embedding / np.linalg.norm(input_embedding)

	return input_embedding.tolist()