Spaces:
Sleeping
Sleeping
File size: 2,042 Bytes
dd2691a be5a334 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
from transformers import CLIPProcessor, CLIPModel
# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
def clip_inference(input_img, input_text):
# Split input_text into a list of text entries
text_entries = [text.strip() for text in input_text.split(",")]
# Prepare inputs for CLIP model
inputs = processor(text=text_entries, images=input_img, return_tensors="pt", padding=True)
# Get similarity scores
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
# Format the output probabilities as a comma-separated string
output_prob = ', '.join([str(prob.item()) for prob in probs[0]])
return output_prob
title = "CLIP OpenAI Model"
description = "Find similarity between images and multiple text entries (separated by commas)."
text_examples = ["a sky with full of stars, painting image",
"a dog playing in the garden, a dog sleeping in the garden",
"a small girl dancing, a small girl playing guitar",
"a small family cooking in the kitchen,family watching the movie",
"students inside the class,students playing in the ground ",
"a traffic signal, a lot of cars",
"a theatre, a football stadium",
"group of animals, group of birds",
"yellow sunflowers, red roses",
"sunset across the lake, sky with full of stars"]
examples = [["examples/images_" + str(i) + ".jpg", text] for i, text in enumerate(text_examples)]
demo = gr.Interface(
clip_inference,
inputs=[
gr.Image(label="Input image"),
gr.Textbox(placeholder="Input text : Multiple entries separated by commas"),
],
outputs=[gr.Textbox(label="similarity scores")],
title=title,
description=description,
examples=examples
)
demo.launch() |