Commit
·
08614a1
1
Parent(s):
6b276dc
First upload of the code to the repo of CLIP_Text_Embeddings.
Browse files- Dockerfile +26 -0
- README.md +5 -5
- app.py +51 -0
- config.cfg +3 -0
- requirements.txt +3 -0
- search_engine_model.py +66 -0
- test_search_engine_model.py +18 -0
Dockerfile
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as the base image
|
2 |
+
FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime AS base
|
3 |
+
|
4 |
+
# It is necessary to install git to run the pip install -r requirements.txt
|
5 |
+
RUN apt-get update && apt-get install -y git
|
6 |
+
|
7 |
+
RUN useradd -m -u 1000 user
|
8 |
+
USER user
|
9 |
+
ENV HOME=/home/user \
|
10 |
+
PATH=/home/user/.local/bin:$PATH
|
11 |
+
|
12 |
+
# Set the working directory in the container
|
13 |
+
WORKDIR $HOME/app
|
14 |
+
|
15 |
+
# Copy the current directory contents into the container at /app
|
16 |
+
COPY --chown=user . $HOME/app
|
17 |
+
|
18 |
+
RUN pip install -r requirements.txt
|
19 |
+
|
20 |
+
FROM base AS debug
|
21 |
+
|
22 |
+
CMD ["python", "-m", "pdb", "test_search_engine_model.py"]
|
23 |
+
|
24 |
+
FROM base AS run
|
25 |
+
|
26 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
---
|
2 |
-
title: CLIP
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
-
short_description:
|
9 |
---
|
10 |
|
11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: CLIP Embeddings
|
3 |
+
emoji: 👀
|
4 |
+
colorFrom: pink
|
5 |
+
colorTo: yellow
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
+
short_description: Image embeddings extractor using the OpenAI CLIP model
|
9 |
---
|
10 |
|
11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import configparser
|
2 |
+
import gradio as gr
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
from search_engine_model import SearchEngineModel
|
6 |
+
|
7 |
+
def get_text_embeddings(text_prompt, input_np_array):
|
8 |
+
search_engine_model = SearchEngineModel()
|
9 |
+
|
10 |
+
model, _ = search_engine_model.load_clip_model()
|
11 |
+
text_embeddings = search_engine_model.encode_text(model, text_prompt)
|
12 |
+
|
13 |
+
input_df = pd.DataFrame(input_np_array)
|
14 |
+
search_result = search_engine_model.search_image_by_text_prompt(text_embeddings, input_df)
|
15 |
+
|
16 |
+
return text_embeddings, search_result
|
17 |
+
|
18 |
+
def main():
|
19 |
+
config_manager_obj = configparser.ConfigParser()
|
20 |
+
config_manager_obj.read('./config.cfg')
|
21 |
+
|
22 |
+
random_features = np.random.rand(50, 512)
|
23 |
+
initial_dataframe = pd.DataFrame(random_features)
|
24 |
+
names_column = [f'image_{it}.png' for it in range(0, len(random_features))]
|
25 |
+
initial_dataframe.insert(0, 'images_names', names_column)
|
26 |
+
|
27 |
+
main_app = gr.Interface(
|
28 |
+
fn=get_text_embeddings,
|
29 |
+
inputs=[
|
30 |
+
gr.Textbox(),
|
31 |
+
gr.Dataframe(
|
32 |
+
initial_dataframe.values,
|
33 |
+
headers = ["image_name"] + [f'feature_{it}'for it in range(0, random_features.shape[1])],
|
34 |
+
type='numpy',
|
35 |
+
interactive=False
|
36 |
+
)
|
37 |
+
],
|
38 |
+
outputs=[
|
39 |
+
gr.Dataframe(type='numpy', headers = [f'feature_{it}'for it in range(0, random_features.shape[1])]),
|
40 |
+
gr.Dataframe(type='numpy', headers = ['image_name', 'similarity'])
|
41 |
+
],
|
42 |
+
title="CLIP Text Embeddings",
|
43 |
+
description="Obtain the embeddings of a given text and use the API to compare with a set of images' embeddings.",
|
44 |
+
flagging_mode="never"
|
45 |
+
)
|
46 |
+
|
47 |
+
HOST_IP_ADDRESS = config_manager_obj['SERVER']['HOST_IP_ADDRESS']
|
48 |
+
PORT_NUMBER = int(config_manager_obj['SERVER']['PORT_NUMBER'])
|
49 |
+
main_app.launch(server_name=HOST_IP_ADDRESS, server_port=PORT_NUMBER)
|
50 |
+
|
51 |
+
main()
|
config.cfg
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[SERVER]
|
2 |
+
HOST_IP_ADDRESS = 0.0.0.0
|
3 |
+
PORT_NUMBER = 7860
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio==5.12.0
|
2 |
+
pandas==2.2.3
|
3 |
+
git+https://github.com/openai/CLIP.git@dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
|
search_engine_model.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import clip
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
import pandas as pd
|
5 |
+
from PIL import Image
|
6 |
+
import random
|
7 |
+
import torch
|
8 |
+
|
9 |
+
class SearchEngineModel():
|
10 |
+
|
11 |
+
def __init__(self):
|
12 |
+
self.logger = logging.getLogger(__name__)
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
14 |
+
|
15 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
+
self.model, self.preprocess = self.load_clip_model()
|
17 |
+
|
18 |
+
def load_clip_model(self):
|
19 |
+
model, preprocess = clip.load("ViT-B/32", device=self.device)
|
20 |
+
|
21 |
+
return model, preprocess
|
22 |
+
|
23 |
+
def read_image(self, image_path):
|
24 |
+
pil_image = Image.open(image_path)
|
25 |
+
|
26 |
+
return pil_image
|
27 |
+
|
28 |
+
def encode_text(self, model, text_prompt):
|
29 |
+
query = clip.tokenize([text_prompt]).to(self.device)
|
30 |
+
with torch.no_grad():
|
31 |
+
text_features = self.model.encode_text(query)
|
32 |
+
text_features = text_features.numpy()
|
33 |
+
|
34 |
+
return text_features
|
35 |
+
|
36 |
+
def __search_image_auxiliar_func__(self, prompt_features, nofimages_to_show):
|
37 |
+
encoded_images, image_paths = self.encode_images(self.model, self.preprocess, self.image_root_dir, self.csv_file_path)
|
38 |
+
similarity = encoded_images @ prompt_features.T
|
39 |
+
values, indices = similarity.topk(nofimages_to_show, dim=0)
|
40 |
+
|
41 |
+
results = []
|
42 |
+
for value, index in zip(values, indices):
|
43 |
+
results.append(image_paths[index])
|
44 |
+
|
45 |
+
return results
|
46 |
+
|
47 |
+
def search_image_by_text_prompt(self, text_features, images_features):
|
48 |
+
names_column = images_features.values[:, 0]
|
49 |
+
search_results = images_features.values[:, 1:].astype(float) @ text_features.T
|
50 |
+
|
51 |
+
search_results_df = pd.DataFrame(search_results)
|
52 |
+
search_results_df.insert(0, "images_names", names_column)
|
53 |
+
search_results_df.columns = ['images_names', 'similarity']
|
54 |
+
search_results_df = search_results_df.sort_values(by='similarity')
|
55 |
+
search_results = search_results_df.values
|
56 |
+
|
57 |
+
return search_results
|
58 |
+
|
59 |
+
def search_image_by_image_prompt(self, image_prompt, nofimages_to_show):
|
60 |
+
image = self.preprocess(image_prompt).unsqueeze(0).to(self.device)
|
61 |
+
with torch.no_grad():
|
62 |
+
image_features = self.model.encode_image(image)
|
63 |
+
|
64 |
+
search_results = self.__search_image_auxiliar_func__(image_features, nofimages_to_show)
|
65 |
+
|
66 |
+
return search_results
|
test_search_engine_model.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from search_engine_model import SearchEngineModel
|
4 |
+
|
5 |
+
def main():
|
6 |
+
search_engine_model = SearchEngineModel()
|
7 |
+
model, preprocess = search_engine_model.load_clip_model()
|
8 |
+
text_prompt = 'cat'
|
9 |
+
text_embeddings = search_engine_model.encode_text(model, text_prompt)
|
10 |
+
|
11 |
+
random_features = np.random.rand(50, 512)
|
12 |
+
input_df = pd.DataFrame(random_features)
|
13 |
+
names_column = [f'image_{it}.png' for it in range(0, len(random_features))]
|
14 |
+
input_df.insert(0, 'images_names', names_column)
|
15 |
+
|
16 |
+
search_result = search_engine_model.search_image_by_text_prompt(text_embeddings, input_df)
|
17 |
+
|
18 |
+
main()
|