Spaces:

Gosula
/

Clip_model

Sleeping

App Files Files Community

Clip_model / app.py

Gosula

Update app.py

be5a334 verified about 1 year ago

raw

history blame contribute delete

2.04 kB

	import gradio as gr
	from transformers import CLIPProcessor, CLIPModel

	# Load the CLIP model and processor
	model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
	processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

	def clip_inference(input_img, input_text):
	# Split input_text into a list of text entries
	text_entries = [text.strip() for text in input_text.split(",")]

	# Prepare inputs for CLIP model
	inputs = processor(text=text_entries, images=input_img, return_tensors="pt", padding=True)

	# Get similarity scores
	outputs = model(**inputs)
	logits_per_image = outputs.logits_per_image
	probs = logits_per_image.softmax(dim=1)

	# Format the output probabilities as a comma-separated string
	output_prob = ', '.join([str(prob.item()) for prob in probs[0]])

	return output_prob

	title = "CLIP OpenAI Model"
	description = "Find similarity between images and multiple text entries (separated by commas)."
	text_examples = ["a sky with full of stars, painting image",
	"a dog playing in the garden, a dog sleeping in the garden",
	"a small girl dancing, a small girl playing guitar",
	"a small family cooking in the kitchen,family watching the movie",
	"students inside the class,students playing in the ground ",
	"a traffic signal, a lot of cars",
	"a theatre, a football stadium",
	"group of animals, group of birds",
	"yellow sunflowers, red roses",
	"sunset across the lake, sky with full of stars"]

	examples = [["examples/images_" + str(i) + ".jpg", text] for i, text in enumerate(text_examples)]

	demo = gr.Interface(
	clip_inference,
	inputs=[
	gr.Image(label="Input image"),
	gr.Textbox(placeholder="Input text : Multiple entries separated by commas"),
	],
	outputs=[gr.Textbox(label="similarity scores")],
	title=title,
	description=description,
	examples=examples
	)

	demo.launch()