DanielIglesias97 commited on
Commit
08614a1
·
1 Parent(s): 6b276dc

First upload of the code to the repo of CLIP_Text_Embeddings.

Browse files
Files changed (7) hide show
  1. Dockerfile +26 -0
  2. README.md +5 -5
  3. app.py +51 -0
  4. config.cfg +3 -0
  5. requirements.txt +3 -0
  6. search_engine_model.py +66 -0
  7. test_search_engine_model.py +18 -0
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as the base image
2
+ FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime AS base
3
+
4
+ # It is necessary to install git to run the pip install -r requirements.txt
5
+ RUN apt-get update && apt-get install -y git
6
+
7
+ RUN useradd -m -u 1000 user
8
+ USER user
9
+ ENV HOME=/home/user \
10
+ PATH=/home/user/.local/bin:$PATH
11
+
12
+ # Set the working directory in the container
13
+ WORKDIR $HOME/app
14
+
15
+ # Copy the current directory contents into the container at /app
16
+ COPY --chown=user . $HOME/app
17
+
18
+ RUN pip install -r requirements.txt
19
+
20
+ FROM base AS debug
21
+
22
+ CMD ["python", "-m", "pdb", "test_search_engine_model.py"]
23
+
24
+ FROM base AS run
25
+
26
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,11 +1,11 @@
1
  ---
2
- title: CLIP Text Embeddings
3
- emoji: 🏢
4
- colorFrom: purple
5
- colorTo: purple
6
  sdk: docker
7
  pinned: false
8
- short_description: ' Obtain the embeddings of a given text'
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: CLIP Embeddings
3
+ emoji: 👀
4
+ colorFrom: pink
5
+ colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
+ short_description: Image embeddings extractor using the OpenAI CLIP model
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import configparser
2
+ import gradio as gr
3
+ import numpy as np
4
+ import pandas as pd
5
+ from search_engine_model import SearchEngineModel
6
+
7
+ def get_text_embeddings(text_prompt, input_np_array):
8
+ search_engine_model = SearchEngineModel()
9
+
10
+ model, _ = search_engine_model.load_clip_model()
11
+ text_embeddings = search_engine_model.encode_text(model, text_prompt)
12
+
13
+ input_df = pd.DataFrame(input_np_array)
14
+ search_result = search_engine_model.search_image_by_text_prompt(text_embeddings, input_df)
15
+
16
+ return text_embeddings, search_result
17
+
18
+ def main():
19
+ config_manager_obj = configparser.ConfigParser()
20
+ config_manager_obj.read('./config.cfg')
21
+
22
+ random_features = np.random.rand(50, 512)
23
+ initial_dataframe = pd.DataFrame(random_features)
24
+ names_column = [f'image_{it}.png' for it in range(0, len(random_features))]
25
+ initial_dataframe.insert(0, 'images_names', names_column)
26
+
27
+ main_app = gr.Interface(
28
+ fn=get_text_embeddings,
29
+ inputs=[
30
+ gr.Textbox(),
31
+ gr.Dataframe(
32
+ initial_dataframe.values,
33
+ headers = ["image_name"] + [f'feature_{it}'for it in range(0, random_features.shape[1])],
34
+ type='numpy',
35
+ interactive=False
36
+ )
37
+ ],
38
+ outputs=[
39
+ gr.Dataframe(type='numpy', headers = [f'feature_{it}'for it in range(0, random_features.shape[1])]),
40
+ gr.Dataframe(type='numpy', headers = ['image_name', 'similarity'])
41
+ ],
42
+ title="CLIP Text Embeddings",
43
+ description="Obtain the embeddings of a given text and use the API to compare with a set of images' embeddings.",
44
+ flagging_mode="never"
45
+ )
46
+
47
+ HOST_IP_ADDRESS = config_manager_obj['SERVER']['HOST_IP_ADDRESS']
48
+ PORT_NUMBER = int(config_manager_obj['SERVER']['PORT_NUMBER'])
49
+ main_app.launch(server_name=HOST_IP_ADDRESS, server_port=PORT_NUMBER)
50
+
51
+ main()
config.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [SERVER]
2
+ HOST_IP_ADDRESS = 0.0.0.0
3
+ PORT_NUMBER = 7860
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.12.0
2
+ pandas==2.2.3
3
+ git+https://github.com/openai/CLIP.git@dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
search_engine_model.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import clip
2
+ import logging
3
+ import os
4
+ import pandas as pd
5
+ from PIL import Image
6
+ import random
7
+ import torch
8
+
9
+ class SearchEngineModel():
10
+
11
+ def __init__(self):
12
+ self.logger = logging.getLogger(__name__)
13
+ logging.basicConfig(level=logging.INFO)
14
+
15
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ self.model, self.preprocess = self.load_clip_model()
17
+
18
+ def load_clip_model(self):
19
+ model, preprocess = clip.load("ViT-B/32", device=self.device)
20
+
21
+ return model, preprocess
22
+
23
+ def read_image(self, image_path):
24
+ pil_image = Image.open(image_path)
25
+
26
+ return pil_image
27
+
28
+ def encode_text(self, model, text_prompt):
29
+ query = clip.tokenize([text_prompt]).to(self.device)
30
+ with torch.no_grad():
31
+ text_features = self.model.encode_text(query)
32
+ text_features = text_features.numpy()
33
+
34
+ return text_features
35
+
36
+ def __search_image_auxiliar_func__(self, prompt_features, nofimages_to_show):
37
+ encoded_images, image_paths = self.encode_images(self.model, self.preprocess, self.image_root_dir, self.csv_file_path)
38
+ similarity = encoded_images @ prompt_features.T
39
+ values, indices = similarity.topk(nofimages_to_show, dim=0)
40
+
41
+ results = []
42
+ for value, index in zip(values, indices):
43
+ results.append(image_paths[index])
44
+
45
+ return results
46
+
47
+ def search_image_by_text_prompt(self, text_features, images_features):
48
+ names_column = images_features.values[:, 0]
49
+ search_results = images_features.values[:, 1:].astype(float) @ text_features.T
50
+
51
+ search_results_df = pd.DataFrame(search_results)
52
+ search_results_df.insert(0, "images_names", names_column)
53
+ search_results_df.columns = ['images_names', 'similarity']
54
+ search_results_df = search_results_df.sort_values(by='similarity')
55
+ search_results = search_results_df.values
56
+
57
+ return search_results
58
+
59
+ def search_image_by_image_prompt(self, image_prompt, nofimages_to_show):
60
+ image = self.preprocess(image_prompt).unsqueeze(0).to(self.device)
61
+ with torch.no_grad():
62
+ image_features = self.model.encode_image(image)
63
+
64
+ search_results = self.__search_image_auxiliar_func__(image_features, nofimages_to_show)
65
+
66
+ return search_results
test_search_engine_model.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from search_engine_model import SearchEngineModel
4
+
5
+ def main():
6
+ search_engine_model = SearchEngineModel()
7
+ model, preprocess = search_engine_model.load_clip_model()
8
+ text_prompt = 'cat'
9
+ text_embeddings = search_engine_model.encode_text(model, text_prompt)
10
+
11
+ random_features = np.random.rand(50, 512)
12
+ input_df = pd.DataFrame(random_features)
13
+ names_column = [f'image_{it}.png' for it in range(0, len(random_features))]
14
+ input_df.insert(0, 'images_names', names_column)
15
+
16
+ search_result = search_engine_model.search_image_by_text_prompt(text_embeddings, input_df)
17
+
18
+ main()