Ujjwal-32 commited on
Commit
3d3f725
ยท
verified ยท
1 Parent(s): d85a321

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ products_clean_updated1.csv filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+ WORKDIR /app
3
+ COPY . .
4
+ RUN pip install -r requirements.txt
5
+ EXPOSE 7860
6
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from flask_cors import CORS
3
+ import torch
4
+ import pandas as pd
5
+ import math
6
+ import os
7
+
8
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/.cache"
9
+ os.environ["HF_HOME"] = "/tmp/.cache"
10
+ os.makedirs("/tmp/.cache", exist_ok=True)
11
+ from sentence_transformers import SentenceTransformer, util
12
+
13
+ app = Flask(__name__)
14
+ CORS(app)
15
+
16
+ # Constants
17
+ PRODUCTS_PER_PAGE = 35
18
+ TOP_ECO_COUNT = 5
19
+ PAGE2_ECO_RATIO = 0.4
20
+
21
+ # Load model and data
22
+ print("๐Ÿ”„ Loading model and data...")
23
+ model = SentenceTransformer("Ujjwal-32/Product-Recommender")
24
+ df = pd.read_csv("products_clean_updated1.csv")
25
+ product_embeddings = torch.load("embeddings_updated1.pt")
26
+ print("โœ… Model and embeddings loaded.")
27
+
28
+
29
+ def sanitize_product(product):
30
+ return {
31
+ k: (None if isinstance(v, float) and math.isnan(v) else v)
32
+ for k, v in product.items()
33
+ }
34
+
35
+
36
+ @app.route("/")
37
+ def home():
38
+ return "โœ… GreenKart Flask Server is running!"
39
+
40
+
41
+ @app.route("/search", methods=["GET"])
42
+ def search_products():
43
+ query = request.args.get("query", "").strip()
44
+ page = int(request.args.get("page", 1))
45
+
46
+ if not query:
47
+ return jsonify({"error": "Missing 'query' parameter"}), 400
48
+
49
+ # Encode query and compute similarity
50
+ query_embedding = model.encode(query, convert_to_tensor=True)
51
+ cosine_scores = util.cos_sim(query_embedding, product_embeddings)[0]
52
+ df["similarity"] = cosine_scores.cpu().numpy()
53
+
54
+ # Sort products by similarity
55
+ sorted_df = df.sort_values(by="similarity", ascending=False)
56
+
57
+ # Split into eco and non-eco
58
+ eco_df = sorted_df[
59
+ (sorted_df["isOrganic"] == True) & (sorted_df["sustainableScore"] >= 75)
60
+ ].reset_index(drop=True)
61
+ non_eco_df = sorted_df[~sorted_df.index.isin(eco_df.index)].reset_index(drop=True)
62
+
63
+ if page == 1:
64
+ # Page 1: 5 top eco + 18 eco + 27 non-eco (shuffled)
65
+ top_eco = eco_df.head(TOP_ECO_COUNT)
66
+ rest_eco = eco_df.iloc[TOP_ECO_COUNT : TOP_ECO_COUNT + 18]
67
+ rest_non_eco = non_eco_df.head(27)
68
+
69
+ mixed_rest = pd.concat([rest_eco, rest_non_eco]).sample(frac=1, random_state=42)
70
+ final_df = pd.concat([top_eco, mixed_rest]).reset_index(drop=True)
71
+ else:
72
+ # Page 2 and onwards
73
+ eco_count = int(PRODUCTS_PER_PAGE * PAGE2_ECO_RATIO)
74
+ non_eco_count = PRODUCTS_PER_PAGE - eco_count
75
+
76
+ eco_offset = TOP_ECO_COUNT + 18 + (page - 2) * eco_count
77
+ non_eco_offset = 27 + (page - 2) * non_eco_count
78
+
79
+ eco_slice = eco_df.iloc[eco_offset : eco_offset + eco_count]
80
+ non_eco_slice = non_eco_df.iloc[non_eco_offset : non_eco_offset + non_eco_count]
81
+
82
+ final_df = (
83
+ pd.concat([eco_slice, non_eco_slice])
84
+ .sample(frac=1, random_state=page)
85
+ .reset_index(drop=True)
86
+ )
87
+
88
+ # โœ… Convert images string to list in all cases
89
+ final_result = []
90
+ for _, row in final_df.iterrows():
91
+ images = []
92
+ if isinstance(row["images"], str):
93
+ images = [img.strip() for img in row["images"].split(",") if img.strip()]
94
+ if not images:
95
+ continue # Skip if image list is empty
96
+ product = row.to_dict()
97
+ product["images"] = images
98
+ product = sanitize_product(product)
99
+ final_result.append(product)
100
+
101
+ return jsonify(final_result)
102
+
103
+
104
+ @app.route("/search-green", methods=["GET"])
105
+ def search_green_products():
106
+ query = request.args.get("query", "").strip()
107
+ page = int(request.args.get("page", 1))
108
+
109
+ if not query:
110
+ return jsonify({"error": "Missing 'query' parameter"}), 400
111
+
112
+ query_embedding = model.encode(query, convert_to_tensor=True)
113
+ cosine_scores = util.cos_sim(query_embedding, product_embeddings)[0]
114
+ df["similarity"] = cosine_scores.cpu().numpy()
115
+
116
+ sorted_eco_df = (
117
+ df[(df["isOrganic"] == True)]
118
+ .sort_values(by="similarity", ascending=False)
119
+ .reset_index(drop=True)
120
+ )
121
+
122
+ start = (page - 1) * PRODUCTS_PER_PAGE
123
+ end = start + PRODUCTS_PER_PAGE
124
+ page_df = sorted_eco_df.iloc[start:end]
125
+
126
+ final_result = []
127
+ for _, row in page_df.iterrows():
128
+ images = []
129
+ if isinstance(row["images"], str):
130
+ images = [img.strip() for img in row["images"].split(",") if img.strip()]
131
+ if not images:
132
+ continue # Skip products without valid images
133
+ product = row.to_dict()
134
+ product["images"] = images
135
+ product = sanitize_product(product)
136
+ final_result.append(product)
137
+
138
+ return jsonify(final_result)
139
+
140
+
141
+ if __name__ == "__main__":
142
+
143
+ port = int(os.environ.get("PORT", 7860))
144
+ app.run(host="0.0.0.0", port=port, debug=True)
embeddings_updated1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31683698380bc3494c28549bf9f85a856a21213da3cf1316538c244011695590
3
+ size 34494077
products_clean_updated1.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:579c81aa4b371c94401a1f14d68136550a54f8a7d998bf632e4fae44e527a100
3
+ size 11093626
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Flask
2
+ flask-cors
3
+ sentence-transformers
4
+ torch
5
+ pandas
6
+ numpy
7
+ scikit-learn
8
+ gunicorn