Spaces:

DanielIglesias97
/

CLIP_Images_Embeddings

Sleeping

App Files Files Community

DanielIglesias97 commited on Feb 24

Commit

e93b7b1

1 Parent(s): 1e751ff

First upload to the repository of the CLIP Embeddings extractor.

Browse files

Files changed (6) hide show

Dockerfile +26 -0
app.py +40 -0
config.cfg +3 -0
requirements.txt +3 -0
search_engine_model.py +64 -0
test_search_engine_model.py +10 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+# Use an official Python runtime as the base image
+FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime AS base
+# It is necessary to install git to run the pip install -r requirements.txt
+RUN apt-get update && apt-get install -y git
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# Set the working directory in the container
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at /app
+COPY --chown=user . $HOME/app
+RUN pip install -r requirements.txt
+FROM base AS debug
+CMD ["python", "-m", "pdb", "search_engine_model.py"]
+FROM base AS run
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import configparser
+import gradio as gr
+import numpy as np
+from search_engine_model import SearchEngineModel
+def get_image_embeddings(input_image_paths_list):
+    search_engine_model = SearchEngineModel()
+    model, preprocess = search_engine_model.load_clip_model()
+    image_embeddings_list = []
+    for current_input_image_path_aux in input_image_paths_list:
+        current_image_embeddings = search_engine_model.encode_image(model, preprocess, current_input_image_path_aux)
+        image_embeddings_list.append(current_image_embeddings.values[0])
+    image_embeddings_np = np.array(image_embeddings_list)
+    return image_embeddings_np
+def main():
+    config_manager_obj = configparser.ConfigParser()
+    config_manager_obj.read('./config.cfg')
+    main_app = gr.Interface(
+        fn=get_image_embeddings,
+        inputs=[
+            gr.File(label="Upload Image", file_count="multiple"),
+        ],
+        outputs=[
+            gr.Dataframe(type='numpy'),
+        ],
+        title="CLIP Image Embeddings",
+        description="Obtain the embeddings of the input images",
+        flagging_mode="never"
+    )
+    HOST_IP_ADDRESS = config_manager_obj['SERVER']['HOST_IP_ADDRESS']
+    PORT_NUMBER = int(config_manager_obj['SERVER']['PORT_NUMBER'])
+    main_app.launch(server_name=HOST_IP_ADDRESS, server_port=PORT_NUMBER)
+main()

config.cfg ADDED Viewed

	@@ -0,0 +1,3 @@

+[SERVER]
+HOST_IP_ADDRESS = 0.0.0.0
+PORT_NUMBER = 8000

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==5.12.0
+pandas==2.2.3
+git+https://github.com/openai/CLIP.git@dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1

search_engine_model.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import clip
+import logging
+import os
+import pandas as pd
+from PIL import Image
+import random
+import torch
+class SearchEngineModel():
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        logging.basicConfig(level=logging.INFO)
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model, self.preprocess = self.load_clip_model()
+    def load_clip_model(self):
+        model, preprocess = clip.load("ViT-B/32", device=self.device)
+        return model, preprocess
+    def read_image(self, image_path):
+        pil_image = Image.open(image_path)
+        return pil_image
+    def encode_image(self, model, preprocess, image_path):
+        image = preprocess(Image.open(image_path)).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            image_features = model.encode_image(image)
+        image_features = pd.DataFrame(image_features.numpy())
+        return image_features
+    def __search_image_auxiliar_func__(self, prompt_features, nofimages_to_show):
+        encoded_images, image_paths = self.encode_images(self.model, self.preprocess, self.image_root_dir, self.csv_file_path)
+        similarity = encoded_images @ prompt_features.T
+        values, indices = similarity.topk(nofimages_to_show, dim=0)
+        results = []
+        for value, index in zip(values, indices):
+            results.append(image_paths[index])
+        return results
+    def search_image_by_text_prompt(self, text_prompt, nofimages_to_show):
+        query = clip.tokenize([text_prompt]).to(self.device)
+        with torch.no_grad():
+            text_features = self.model.encode_text(query)
+        search_results = self.__search_image_auxiliar_func__(text_features, nofimages_to_show)
+        return search_results
+    def search_image_by_image_prompt(self, image_prompt, nofimages_to_show):
+        image = self.preprocess(image_prompt).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            image_features = self.model.encode_image(image)
+        search_results = self.__search_image_auxiliar_func__(image_features, nofimages_to_show)
+        return search_results

test_search_engine_model.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from search_engine_model import SearchEngineModel
+def main():
+    search_engine_model = SearchEngineModel()
+    model, preprocess = search_engine_model.load_clip_model()
+    input_image_path = './cat.jpg'
+    image_embeddings = search_engine_model.encode_image(model, preprocess, input_image_path)
+main()