Upload 5 files
Browse files- .gitattributes +1 -0
- Dockerfile +6 -0
- app.py +144 -0
- embeddings_updated1.pt +3 -0
- products_clean_updated1.csv +3 -0
- requirements.txt +8 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
products_clean_updated1.csv filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
WORKDIR /app
|
3 |
+
COPY . .
|
4 |
+
RUN pip install -r requirements.txt
|
5 |
+
EXPOSE 7860
|
6 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
|
app.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify
|
2 |
+
from flask_cors import CORS
|
3 |
+
import torch
|
4 |
+
import pandas as pd
|
5 |
+
import math
|
6 |
+
import os
|
7 |
+
|
8 |
+
os.environ["TRANSFORMERS_CACHE"] = "/tmp/.cache"
|
9 |
+
os.environ["HF_HOME"] = "/tmp/.cache"
|
10 |
+
os.makedirs("/tmp/.cache", exist_ok=True)
|
11 |
+
from sentence_transformers import SentenceTransformer, util
|
12 |
+
|
13 |
+
app = Flask(__name__)
|
14 |
+
CORS(app)
|
15 |
+
|
16 |
+
# Constants
|
17 |
+
PRODUCTS_PER_PAGE = 35
|
18 |
+
TOP_ECO_COUNT = 5
|
19 |
+
PAGE2_ECO_RATIO = 0.4
|
20 |
+
|
21 |
+
# Load model and data
|
22 |
+
print("๐ Loading model and data...")
|
23 |
+
model = SentenceTransformer("Ujjwal-32/Product-Recommender")
|
24 |
+
df = pd.read_csv("products_clean_updated1.csv")
|
25 |
+
product_embeddings = torch.load("embeddings_updated1.pt")
|
26 |
+
print("โ
Model and embeddings loaded.")
|
27 |
+
|
28 |
+
|
29 |
+
def sanitize_product(product):
|
30 |
+
return {
|
31 |
+
k: (None if isinstance(v, float) and math.isnan(v) else v)
|
32 |
+
for k, v in product.items()
|
33 |
+
}
|
34 |
+
|
35 |
+
|
36 |
+
@app.route("/")
|
37 |
+
def home():
|
38 |
+
return "โ
GreenKart Flask Server is running!"
|
39 |
+
|
40 |
+
|
41 |
+
@app.route("/search", methods=["GET"])
|
42 |
+
def search_products():
|
43 |
+
query = request.args.get("query", "").strip()
|
44 |
+
page = int(request.args.get("page", 1))
|
45 |
+
|
46 |
+
if not query:
|
47 |
+
return jsonify({"error": "Missing 'query' parameter"}), 400
|
48 |
+
|
49 |
+
# Encode query and compute similarity
|
50 |
+
query_embedding = model.encode(query, convert_to_tensor=True)
|
51 |
+
cosine_scores = util.cos_sim(query_embedding, product_embeddings)[0]
|
52 |
+
df["similarity"] = cosine_scores.cpu().numpy()
|
53 |
+
|
54 |
+
# Sort products by similarity
|
55 |
+
sorted_df = df.sort_values(by="similarity", ascending=False)
|
56 |
+
|
57 |
+
# Split into eco and non-eco
|
58 |
+
eco_df = sorted_df[
|
59 |
+
(sorted_df["isOrganic"] == True) & (sorted_df["sustainableScore"] >= 75)
|
60 |
+
].reset_index(drop=True)
|
61 |
+
non_eco_df = sorted_df[~sorted_df.index.isin(eco_df.index)].reset_index(drop=True)
|
62 |
+
|
63 |
+
if page == 1:
|
64 |
+
# Page 1: 5 top eco + 18 eco + 27 non-eco (shuffled)
|
65 |
+
top_eco = eco_df.head(TOP_ECO_COUNT)
|
66 |
+
rest_eco = eco_df.iloc[TOP_ECO_COUNT : TOP_ECO_COUNT + 18]
|
67 |
+
rest_non_eco = non_eco_df.head(27)
|
68 |
+
|
69 |
+
mixed_rest = pd.concat([rest_eco, rest_non_eco]).sample(frac=1, random_state=42)
|
70 |
+
final_df = pd.concat([top_eco, mixed_rest]).reset_index(drop=True)
|
71 |
+
else:
|
72 |
+
# Page 2 and onwards
|
73 |
+
eco_count = int(PRODUCTS_PER_PAGE * PAGE2_ECO_RATIO)
|
74 |
+
non_eco_count = PRODUCTS_PER_PAGE - eco_count
|
75 |
+
|
76 |
+
eco_offset = TOP_ECO_COUNT + 18 + (page - 2) * eco_count
|
77 |
+
non_eco_offset = 27 + (page - 2) * non_eco_count
|
78 |
+
|
79 |
+
eco_slice = eco_df.iloc[eco_offset : eco_offset + eco_count]
|
80 |
+
non_eco_slice = non_eco_df.iloc[non_eco_offset : non_eco_offset + non_eco_count]
|
81 |
+
|
82 |
+
final_df = (
|
83 |
+
pd.concat([eco_slice, non_eco_slice])
|
84 |
+
.sample(frac=1, random_state=page)
|
85 |
+
.reset_index(drop=True)
|
86 |
+
)
|
87 |
+
|
88 |
+
# โ
Convert images string to list in all cases
|
89 |
+
final_result = []
|
90 |
+
for _, row in final_df.iterrows():
|
91 |
+
images = []
|
92 |
+
if isinstance(row["images"], str):
|
93 |
+
images = [img.strip() for img in row["images"].split(",") if img.strip()]
|
94 |
+
if not images:
|
95 |
+
continue # Skip if image list is empty
|
96 |
+
product = row.to_dict()
|
97 |
+
product["images"] = images
|
98 |
+
product = sanitize_product(product)
|
99 |
+
final_result.append(product)
|
100 |
+
|
101 |
+
return jsonify(final_result)
|
102 |
+
|
103 |
+
|
104 |
+
@app.route("/search-green", methods=["GET"])
|
105 |
+
def search_green_products():
|
106 |
+
query = request.args.get("query", "").strip()
|
107 |
+
page = int(request.args.get("page", 1))
|
108 |
+
|
109 |
+
if not query:
|
110 |
+
return jsonify({"error": "Missing 'query' parameter"}), 400
|
111 |
+
|
112 |
+
query_embedding = model.encode(query, convert_to_tensor=True)
|
113 |
+
cosine_scores = util.cos_sim(query_embedding, product_embeddings)[0]
|
114 |
+
df["similarity"] = cosine_scores.cpu().numpy()
|
115 |
+
|
116 |
+
sorted_eco_df = (
|
117 |
+
df[(df["isOrganic"] == True)]
|
118 |
+
.sort_values(by="similarity", ascending=False)
|
119 |
+
.reset_index(drop=True)
|
120 |
+
)
|
121 |
+
|
122 |
+
start = (page - 1) * PRODUCTS_PER_PAGE
|
123 |
+
end = start + PRODUCTS_PER_PAGE
|
124 |
+
page_df = sorted_eco_df.iloc[start:end]
|
125 |
+
|
126 |
+
final_result = []
|
127 |
+
for _, row in page_df.iterrows():
|
128 |
+
images = []
|
129 |
+
if isinstance(row["images"], str):
|
130 |
+
images = [img.strip() for img in row["images"].split(",") if img.strip()]
|
131 |
+
if not images:
|
132 |
+
continue # Skip products without valid images
|
133 |
+
product = row.to_dict()
|
134 |
+
product["images"] = images
|
135 |
+
product = sanitize_product(product)
|
136 |
+
final_result.append(product)
|
137 |
+
|
138 |
+
return jsonify(final_result)
|
139 |
+
|
140 |
+
|
141 |
+
if __name__ == "__main__":
|
142 |
+
|
143 |
+
port = int(os.environ.get("PORT", 7860))
|
144 |
+
app.run(host="0.0.0.0", port=port, debug=True)
|
embeddings_updated1.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31683698380bc3494c28549bf9f85a856a21213da3cf1316538c244011695590
|
3 |
+
size 34494077
|
products_clean_updated1.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:579c81aa4b371c94401a1f14d68136550a54f8a7d998bf632e4fae44e527a100
|
3 |
+
size 11093626
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Flask
|
2 |
+
flask-cors
|
3 |
+
sentence-transformers
|
4 |
+
torch
|
5 |
+
pandas
|
6 |
+
numpy
|
7 |
+
scikit-learn
|
8 |
+
gunicorn
|