DanielIglesias97 commited on
Commit
e93b7b1
·
1 Parent(s): 1e751ff

First upload to the repository of the CLIP Embeddings extractor.

Browse files
Files changed (6) hide show
  1. Dockerfile +26 -0
  2. app.py +40 -0
  3. config.cfg +3 -0
  4. requirements.txt +3 -0
  5. search_engine_model.py +64 -0
  6. test_search_engine_model.py +10 -0
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as the base image
2
+ FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime AS base
3
+
4
+ # It is necessary to install git to run the pip install -r requirements.txt
5
+ RUN apt-get update && apt-get install -y git
6
+
7
+ RUN useradd -m -u 1000 user
8
+ USER user
9
+ ENV HOME=/home/user \
10
+ PATH=/home/user/.local/bin:$PATH
11
+
12
+ # Set the working directory in the container
13
+ WORKDIR $HOME/app
14
+
15
+ # Copy the current directory contents into the container at /app
16
+ COPY --chown=user . $HOME/app
17
+
18
+ RUN pip install -r requirements.txt
19
+
20
+ FROM base AS debug
21
+
22
+ CMD ["python", "-m", "pdb", "search_engine_model.py"]
23
+
24
+ FROM base AS run
25
+
26
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import configparser
2
+ import gradio as gr
3
+ import numpy as np
4
+ from search_engine_model import SearchEngineModel
5
+
6
+ def get_image_embeddings(input_image_paths_list):
7
+ search_engine_model = SearchEngineModel()
8
+
9
+ model, preprocess = search_engine_model.load_clip_model()
10
+ image_embeddings_list = []
11
+ for current_input_image_path_aux in input_image_paths_list:
12
+ current_image_embeddings = search_engine_model.encode_image(model, preprocess, current_input_image_path_aux)
13
+ image_embeddings_list.append(current_image_embeddings.values[0])
14
+
15
+ image_embeddings_np = np.array(image_embeddings_list)
16
+
17
+ return image_embeddings_np
18
+
19
+ def main():
20
+ config_manager_obj = configparser.ConfigParser()
21
+ config_manager_obj.read('./config.cfg')
22
+
23
+ main_app = gr.Interface(
24
+ fn=get_image_embeddings,
25
+ inputs=[
26
+ gr.File(label="Upload Image", file_count="multiple"),
27
+ ],
28
+ outputs=[
29
+ gr.Dataframe(type='numpy'),
30
+ ],
31
+ title="CLIP Image Embeddings",
32
+ description="Obtain the embeddings of the input images",
33
+ flagging_mode="never"
34
+ )
35
+
36
+ HOST_IP_ADDRESS = config_manager_obj['SERVER']['HOST_IP_ADDRESS']
37
+ PORT_NUMBER = int(config_manager_obj['SERVER']['PORT_NUMBER'])
38
+ main_app.launch(server_name=HOST_IP_ADDRESS, server_port=PORT_NUMBER)
39
+
40
+ main()
config.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [SERVER]
2
+ HOST_IP_ADDRESS = 0.0.0.0
3
+ PORT_NUMBER = 8000
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.12.0
2
+ pandas==2.2.3
3
+ git+https://github.com/openai/CLIP.git@dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
search_engine_model.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import clip
2
+ import logging
3
+ import os
4
+ import pandas as pd
5
+ from PIL import Image
6
+ import random
7
+ import torch
8
+
9
+ class SearchEngineModel():
10
+
11
+ def __init__(self):
12
+ self.logger = logging.getLogger(__name__)
13
+ logging.basicConfig(level=logging.INFO)
14
+
15
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ self.model, self.preprocess = self.load_clip_model()
17
+
18
+ def load_clip_model(self):
19
+ model, preprocess = clip.load("ViT-B/32", device=self.device)
20
+
21
+ return model, preprocess
22
+
23
+ def read_image(self, image_path):
24
+ pil_image = Image.open(image_path)
25
+
26
+ return pil_image
27
+
28
+ def encode_image(self, model, preprocess, image_path):
29
+ image = preprocess(Image.open(image_path)).unsqueeze(0).to(self.device)
30
+ with torch.no_grad():
31
+ image_features = model.encode_image(image)
32
+
33
+ image_features = pd.DataFrame(image_features.numpy())
34
+
35
+ return image_features
36
+
37
+ def __search_image_auxiliar_func__(self, prompt_features, nofimages_to_show):
38
+ encoded_images, image_paths = self.encode_images(self.model, self.preprocess, self.image_root_dir, self.csv_file_path)
39
+ similarity = encoded_images @ prompt_features.T
40
+ values, indices = similarity.topk(nofimages_to_show, dim=0)
41
+
42
+ results = []
43
+ for value, index in zip(values, indices):
44
+ results.append(image_paths[index])
45
+
46
+ return results
47
+
48
+ def search_image_by_text_prompt(self, text_prompt, nofimages_to_show):
49
+ query = clip.tokenize([text_prompt]).to(self.device)
50
+ with torch.no_grad():
51
+ text_features = self.model.encode_text(query)
52
+
53
+ search_results = self.__search_image_auxiliar_func__(text_features, nofimages_to_show)
54
+
55
+ return search_results
56
+
57
+ def search_image_by_image_prompt(self, image_prompt, nofimages_to_show):
58
+ image = self.preprocess(image_prompt).unsqueeze(0).to(self.device)
59
+ with torch.no_grad():
60
+ image_features = self.model.encode_image(image)
61
+
62
+ search_results = self.__search_image_auxiliar_func__(image_features, nofimages_to_show)
63
+
64
+ return search_results
test_search_engine_model.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from search_engine_model import SearchEngineModel
2
+
3
+ def main():
4
+ search_engine_model = SearchEngineModel()
5
+
6
+ model, preprocess = search_engine_model.load_clip_model()
7
+ input_image_path = './cat.jpg'
8
+ image_embeddings = search_engine_model.encode_image(model, preprocess, input_image_path)
9
+
10
+ main()