Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import CLIPProcessor, CLIPModel | |
# Load the CLIP model and processor | |
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") | |
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") | |
def clip_inference(input_img, input_text): | |
# Split input_text into a list of text entries | |
text_entries = [text.strip() for text in input_text.split(",")] | |
# Prepare inputs for CLIP model | |
inputs = processor(text=text_entries, images=input_img, return_tensors="pt", padding=True) | |
# Get similarity scores | |
outputs = model(**inputs) | |
logits_per_image = outputs.logits_per_image | |
probs = logits_per_image.softmax(dim=1) | |
# Format the output probabilities as a comma-separated string | |
output_prob = ', '.join([str(prob.item()) for prob in probs[0]]) | |
return output_prob | |
title = "CLIP OpenAI Model" | |
description = "Find similarity between images and multiple text entries (separated by commas)." | |
text_examples = ["a sky with full of stars, painting image", | |
"a dog playing in the garden, a dog sleeping in the garden", | |
"a small girl dancing, a small girl playing guitar", | |
"a small family cooking in the kitchen,family watching the movie", | |
"students inside the class,students playing in the ground ", | |
"a traffic signal, a lot of cars", | |
"a theatre, a football stadium", | |
"group of animals, group of birds", | |
"yellow sunflowers, red roses", | |
"sunset across the lake, sky with full of stars"] | |
examples = [["examples/images_" + str(i) + ".jpg", text] for i, text in enumerate(text_examples)] | |
demo = gr.Interface( | |
clip_inference, | |
inputs=[ | |
gr.Image(label="Input image"), | |
gr.Textbox(placeholder="Input text : Multiple entries separated by commas"), | |
], | |
outputs=[gr.Textbox(label="similarity scores")], | |
title=title, | |
description=description, | |
examples=examples | |
) | |
demo.launch() |