Commit
·
e93b7b1
1
Parent(s):
1e751ff
First upload to the repository of the CLIP Embeddings extractor.
Browse files- Dockerfile +26 -0
- app.py +40 -0
- config.cfg +3 -0
- requirements.txt +3 -0
- search_engine_model.py +64 -0
- test_search_engine_model.py +10 -0
Dockerfile
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as the base image
|
2 |
+
FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime AS base
|
3 |
+
|
4 |
+
# It is necessary to install git to run the pip install -r requirements.txt
|
5 |
+
RUN apt-get update && apt-get install -y git
|
6 |
+
|
7 |
+
RUN useradd -m -u 1000 user
|
8 |
+
USER user
|
9 |
+
ENV HOME=/home/user \
|
10 |
+
PATH=/home/user/.local/bin:$PATH
|
11 |
+
|
12 |
+
# Set the working directory in the container
|
13 |
+
WORKDIR $HOME/app
|
14 |
+
|
15 |
+
# Copy the current directory contents into the container at /app
|
16 |
+
COPY --chown=user . $HOME/app
|
17 |
+
|
18 |
+
RUN pip install -r requirements.txt
|
19 |
+
|
20 |
+
FROM base AS debug
|
21 |
+
|
22 |
+
CMD ["python", "-m", "pdb", "search_engine_model.py"]
|
23 |
+
|
24 |
+
FROM base AS run
|
25 |
+
|
26 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import configparser
|
2 |
+
import gradio as gr
|
3 |
+
import numpy as np
|
4 |
+
from search_engine_model import SearchEngineModel
|
5 |
+
|
6 |
+
def get_image_embeddings(input_image_paths_list):
|
7 |
+
search_engine_model = SearchEngineModel()
|
8 |
+
|
9 |
+
model, preprocess = search_engine_model.load_clip_model()
|
10 |
+
image_embeddings_list = []
|
11 |
+
for current_input_image_path_aux in input_image_paths_list:
|
12 |
+
current_image_embeddings = search_engine_model.encode_image(model, preprocess, current_input_image_path_aux)
|
13 |
+
image_embeddings_list.append(current_image_embeddings.values[0])
|
14 |
+
|
15 |
+
image_embeddings_np = np.array(image_embeddings_list)
|
16 |
+
|
17 |
+
return image_embeddings_np
|
18 |
+
|
19 |
+
def main():
|
20 |
+
config_manager_obj = configparser.ConfigParser()
|
21 |
+
config_manager_obj.read('./config.cfg')
|
22 |
+
|
23 |
+
main_app = gr.Interface(
|
24 |
+
fn=get_image_embeddings,
|
25 |
+
inputs=[
|
26 |
+
gr.File(label="Upload Image", file_count="multiple"),
|
27 |
+
],
|
28 |
+
outputs=[
|
29 |
+
gr.Dataframe(type='numpy'),
|
30 |
+
],
|
31 |
+
title="CLIP Image Embeddings",
|
32 |
+
description="Obtain the embeddings of the input images",
|
33 |
+
flagging_mode="never"
|
34 |
+
)
|
35 |
+
|
36 |
+
HOST_IP_ADDRESS = config_manager_obj['SERVER']['HOST_IP_ADDRESS']
|
37 |
+
PORT_NUMBER = int(config_manager_obj['SERVER']['PORT_NUMBER'])
|
38 |
+
main_app.launch(server_name=HOST_IP_ADDRESS, server_port=PORT_NUMBER)
|
39 |
+
|
40 |
+
main()
|
config.cfg
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[SERVER]
|
2 |
+
HOST_IP_ADDRESS = 0.0.0.0
|
3 |
+
PORT_NUMBER = 8000
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio==5.12.0
|
2 |
+
pandas==2.2.3
|
3 |
+
git+https://github.com/openai/CLIP.git@dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
|
search_engine_model.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import clip
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
import pandas as pd
|
5 |
+
from PIL import Image
|
6 |
+
import random
|
7 |
+
import torch
|
8 |
+
|
9 |
+
class SearchEngineModel():
|
10 |
+
|
11 |
+
def __init__(self):
|
12 |
+
self.logger = logging.getLogger(__name__)
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
14 |
+
|
15 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
+
self.model, self.preprocess = self.load_clip_model()
|
17 |
+
|
18 |
+
def load_clip_model(self):
|
19 |
+
model, preprocess = clip.load("ViT-B/32", device=self.device)
|
20 |
+
|
21 |
+
return model, preprocess
|
22 |
+
|
23 |
+
def read_image(self, image_path):
|
24 |
+
pil_image = Image.open(image_path)
|
25 |
+
|
26 |
+
return pil_image
|
27 |
+
|
28 |
+
def encode_image(self, model, preprocess, image_path):
|
29 |
+
image = preprocess(Image.open(image_path)).unsqueeze(0).to(self.device)
|
30 |
+
with torch.no_grad():
|
31 |
+
image_features = model.encode_image(image)
|
32 |
+
|
33 |
+
image_features = pd.DataFrame(image_features.numpy())
|
34 |
+
|
35 |
+
return image_features
|
36 |
+
|
37 |
+
def __search_image_auxiliar_func__(self, prompt_features, nofimages_to_show):
|
38 |
+
encoded_images, image_paths = self.encode_images(self.model, self.preprocess, self.image_root_dir, self.csv_file_path)
|
39 |
+
similarity = encoded_images @ prompt_features.T
|
40 |
+
values, indices = similarity.topk(nofimages_to_show, dim=0)
|
41 |
+
|
42 |
+
results = []
|
43 |
+
for value, index in zip(values, indices):
|
44 |
+
results.append(image_paths[index])
|
45 |
+
|
46 |
+
return results
|
47 |
+
|
48 |
+
def search_image_by_text_prompt(self, text_prompt, nofimages_to_show):
|
49 |
+
query = clip.tokenize([text_prompt]).to(self.device)
|
50 |
+
with torch.no_grad():
|
51 |
+
text_features = self.model.encode_text(query)
|
52 |
+
|
53 |
+
search_results = self.__search_image_auxiliar_func__(text_features, nofimages_to_show)
|
54 |
+
|
55 |
+
return search_results
|
56 |
+
|
57 |
+
def search_image_by_image_prompt(self, image_prompt, nofimages_to_show):
|
58 |
+
image = self.preprocess(image_prompt).unsqueeze(0).to(self.device)
|
59 |
+
with torch.no_grad():
|
60 |
+
image_features = self.model.encode_image(image)
|
61 |
+
|
62 |
+
search_results = self.__search_image_auxiliar_func__(image_features, nofimages_to_show)
|
63 |
+
|
64 |
+
return search_results
|
test_search_engine_model.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from search_engine_model import SearchEngineModel
|
2 |
+
|
3 |
+
def main():
|
4 |
+
search_engine_model = SearchEngineModel()
|
5 |
+
|
6 |
+
model, preprocess = search_engine_model.load_clip_model()
|
7 |
+
input_image_path = './cat.jpg'
|
8 |
+
image_embeddings = search_engine_model.encode_image(model, preprocess, input_image_path)
|
9 |
+
|
10 |
+
main()
|