JoJosmin commited on
Commit
3c76019
·
verified ·
1 Parent(s): 80c6ad4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -7
app.py CHANGED
@@ -129,15 +129,53 @@ def segment_clothing(img, clothes=["Hat", "Upper-clothes", "Skirt", "Pants", "Dr
129
 
130
  # return structured_results
131
 
132
- def find_similar_images(query_embedding, faiss_index, all_metadatas, top_k=5):
133
- query_embedding = query_embedding.astype('float32').reshape(1, -1) # 차원 조정 및 형변환
134
- _, indices = faiss_index.search(query_embedding, top_k) # 유사한 벡터의 인덱스를 반환
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- # 인덱스를 이용해 메타데이터와 유사도 가져오기
137
  structured_results = []
138
- for idx in indices[0]:
139
- metadata = all_metadatas[idx]
140
- similarity = cosine_similarity(query_embedding, faiss_index.reconstruct(idx).reshape(1, -1))[0][0]
141
  structured_results.append({
142
  'info': metadata,
143
  'similarity': similarity
 
129
 
130
  # return structured_results
131
 
132
+ #def find_similar_images(query_embedding, collection, top_k=5):
133
+ # # ChromaDB 쿼리
134
+ # results = collection.query(
135
+ # query_embeddings=query_embedding.reshape(1, -1), # 2D 배열로 변환
136
+ # n_results=top_k,
137
+ # include=['metadatas', 'embeddings'] # 메타데이터와 임베딩 포함
138
+ # )
139
+ #
140
+ # # 메타데이터와 임베딩 추출
141
+ # top_metadatas = results['metadatas'][0]
142
+ # top_embeddings = np.array(results['embeddings'][0]) # numpy 배열로 변환
143
+
144
+ # # 코사인 유사도 계산
145
+ # query_embedding_normalized = normalize(query_embedding.reshape(1, -1), axis=1)
146
+ # top_embeddings_normalized = normalize(top_embeddings, axis=1)
147
+ # similarities = cosine_similarity(query_embedding_normalized, top_embeddings_normalized).flatten()
148
+
149
+ # structured_results = []
150
+ # for metadata, similarity in zip(top_metadatas, similarities):
151
+ # structured_results.append({
152
+ # 'info': metadata,
153
+ # 'similarity': similarity
154
+ # })
155
+ #
156
+ # return structured_results
157
+
158
+
159
+ def find_similar_images(query_embedding, collection, top_k=5):
160
+ # ChromaDB에서 임베딩과 메타데이터를 가져옵니다.
161
+ all_data = collection.get(include=['embeddings', 'metadatas'])
162
+ all_embeddings = np.array(all_data['embeddings']).astype('float32') # faiss는 float32 필요
163
+ all_metadatas = all_data['metadatas']
164
+
165
+ # faiss 인덱스를 L2 거리 기반으로 생성 (코사인 유사도는 정규화 필요)
166
+ faiss.normalize_L2(all_embeddings) # L2 정규화
167
+ index = faiss.IndexFlatIP(all_embeddings.shape[1]) # IP는 Inner Product(코사인 유사도)
168
+ index.add(all_embeddings)
169
+
170
+ # 쿼리 임베딩 정규화 후 faiss 검색
171
+ query_embedding = query_embedding.reshape(1, -1).astype('float32')
172
+ faiss.normalize_L2(query_embedding)
173
+ _, indices = index.search(query_embedding, top_k)
174
 
175
+ # 검색된 상위 결과를 반환
176
  structured_results = []
177
+ for metadata, idx in zip(all_metadatas, indices[0]):
178
+ similarity = np.dot(query_embedding, all_embeddings[idx]) # 코사인 유사도 계산
 
179
  structured_results.append({
180
  'info': metadata,
181
  'similarity': similarity