Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,11 @@
|
|
|
|
|
|
1 |
from flask import Flask, request, render_template
|
2 |
import requests
|
3 |
from bs4 import BeautifulSoup
|
4 |
-
import re
|
5 |
|
6 |
app = Flask(__name__)
|
|
|
7 |
|
8 |
def extract_price(price_str):
|
9 |
"""Extract a numeric value from a price string like '₹1,187'."""
|
@@ -22,99 +24,116 @@ def index():
|
|
22 |
|
23 |
if request.method == 'POST':
|
24 |
product_name = request.form.get('product')
|
|
|
25 |
if product_name:
|
26 |
# --------- AMAZON SCRAPING -----------
|
27 |
-
amazon_url = f"https://www.amazon.in/s?k={product_name.replace(' ', '+')}
|
28 |
headers = {
|
29 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
30 |
-
|
31 |
-
|
32 |
}
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
40 |
|
41 |
-
|
42 |
-
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
if not title or not price or not product_url:
|
59 |
-
continue
|
60 |
-
|
61 |
-
price_val = extract_price(price)
|
62 |
-
|
63 |
-
amazon_list.append({
|
64 |
-
"title": title,
|
65 |
-
"image": image_url,
|
66 |
-
"price": price,
|
67 |
-
"url": product_url,
|
68 |
-
"price_val": price_val
|
69 |
-
})
|
70 |
-
if len(amazon_list) >= 6:
|
71 |
-
break
|
72 |
amazon_list = sorted(amazon_list, key=lambda x: x['price_val'])[:6]
|
73 |
|
74 |
# --------- FLIPKART SCRAPING -----------
|
75 |
-
flipkart_url = f"https://www.flipkart.com/search?q={product_name.replace(' ', '+')}
|
76 |
headers_flip = {
|
77 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
78 |
-
|
79 |
-
|
80 |
}
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
-
if not title or not price or not product_url:
|
102 |
-
continue
|
103 |
-
|
104 |
-
price_val = extract_price(price)
|
105 |
-
|
106 |
-
flipkart_list.append({
|
107 |
-
"title": title,
|
108 |
-
"image": image_url,
|
109 |
-
"price": price,
|
110 |
-
"url": product_url,
|
111 |
-
"price_val": price_val
|
112 |
-
})
|
113 |
-
if len(flipkart_list) >= 6:
|
114 |
-
break
|
115 |
flipkart_list = sorted(flipkart_list, key=lambda x: x['price_val'])[:6]
|
116 |
|
117 |
return render_template('index.html', amazon=amazon_list, flipkart=flipkart_list)
|
118 |
|
119 |
if __name__ == '__main__':
|
120 |
-
app.run(host='0.0.0.0', port=8080)
|
|
|
1 |
+
import logging
|
2 |
+
import re
|
3 |
from flask import Flask, request, render_template
|
4 |
import requests
|
5 |
from bs4 import BeautifulSoup
|
|
|
6 |
|
7 |
app = Flask(__name__)
|
8 |
+
logging.basicConfig(level=logging.DEBUG)
|
9 |
|
10 |
def extract_price(price_str):
|
11 |
"""Extract a numeric value from a price string like '₹1,187'."""
|
|
|
24 |
|
25 |
if request.method == 'POST':
|
26 |
product_name = request.form.get('product')
|
27 |
+
app.logger.debug("Search initiated for product: %s", product_name)
|
28 |
if product_name:
|
29 |
# --------- AMAZON SCRAPING -----------
|
30 |
+
amazon_url = f"https://www.amazon.in/s?k={product_name.replace(' ', '+')}"
|
31 |
headers = {
|
32 |
+
"User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
33 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
34 |
+
"Chrome/90.0.4430.212 Safari/537.36")
|
35 |
}
|
36 |
+
try:
|
37 |
+
response_amazon = requests.get(amazon_url, headers=headers, timeout=10)
|
38 |
+
app.logger.debug("Amazon response status: %s", response_amazon.status_code)
|
39 |
+
if response_amazon.status_code == 200:
|
40 |
+
soup_amazon = BeautifulSoup(response_amazon.text, 'html.parser')
|
41 |
+
products = soup_amazon.find_all("div", attrs={"data-csa-c-item-id": True})
|
42 |
+
app.logger.debug("Found %d Amazon products", len(products))
|
43 |
+
for product in products:
|
44 |
+
title_tag = product.find("h2", class_="a-size-base-plus")
|
45 |
+
title = title_tag.get_text(strip=True) if title_tag else None
|
46 |
|
47 |
+
img_tag = product.find("img", class_="s-image")
|
48 |
+
image_url = img_tag["src"] if img_tag and img_tag.has_attr("src") else ""
|
49 |
|
50 |
+
price = None
|
51 |
+
price_tag = product.find("span", class_="a-price")
|
52 |
+
if price_tag:
|
53 |
+
offscreen = price_tag.find("span", class_="a-offscreen")
|
54 |
+
if offscreen:
|
55 |
+
price = offscreen.get_text(strip=True)
|
56 |
+
|
57 |
+
product_url = None
|
58 |
+
link_tag = product.find("a", class_="a-link-normal s-line-clamp-4 s-link-style a-text-normal")
|
59 |
+
if link_tag and link_tag.has_attr("href"):
|
60 |
+
product_url = link_tag["href"]
|
61 |
+
if product_url.startswith("/"):
|
62 |
+
product_url = "https://www.amazon.in" + product_url
|
63 |
+
|
64 |
+
if not title or not price or not product_url:
|
65 |
+
continue
|
66 |
+
|
67 |
+
price_val = extract_price(price)
|
68 |
+
|
69 |
+
amazon_list.append({
|
70 |
+
"title": title,
|
71 |
+
"image": image_url,
|
72 |
+
"price": price,
|
73 |
+
"url": product_url,
|
74 |
+
"price_val": price_val
|
75 |
+
})
|
76 |
+
if len(amazon_list) >= 6:
|
77 |
+
break
|
78 |
+
else:
|
79 |
+
app.logger.debug("Failed to retrieve Amazon page")
|
80 |
+
except Exception as e:
|
81 |
+
app.logger.error("Error during Amazon scraping: %s", e)
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
amazon_list = sorted(amazon_list, key=lambda x: x['price_val'])[:6]
|
84 |
|
85 |
# --------- FLIPKART SCRAPING -----------
|
86 |
+
flipkart_url = f"https://www.flipkart.com/search?q={product_name.replace(' ', '+')}"
|
87 |
headers_flip = {
|
88 |
+
"User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
89 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
90 |
+
"Chrome/90.0.4430.212 Safari/537.36")
|
91 |
}
|
92 |
+
try:
|
93 |
+
response_flip = requests.get(flipkart_url, headers=headers_flip, timeout=10)
|
94 |
+
app.logger.debug("Flipkart response status: %s", response_flip.status_code)
|
95 |
+
if response_flip.status_code == 200:
|
96 |
+
soup_flip = BeautifulSoup(response_flip.text, 'html.parser')
|
97 |
+
flipkart_products = soup_flip.find_all("div", class_="slAVV4")
|
98 |
+
app.logger.debug("Found %d Flipkart products", len(flipkart_products))
|
99 |
+
for product in flipkart_products:
|
100 |
+
title_tag = product.find("a", class_="wjcEIp")
|
101 |
+
title = title_tag.get_text(strip=True) if title_tag else None
|
102 |
+
|
103 |
+
product_url = None
|
104 |
+
if title_tag and title_tag.has_attr("href"):
|
105 |
+
product_url = title_tag["href"]
|
106 |
+
if product_url.startswith("/"):
|
107 |
+
product_url = "https://www.flipkart.com" + product_url
|
108 |
+
|
109 |
+
img_tag = product.find("img", class_="DByuf4")
|
110 |
+
image_url = img_tag["src"] if img_tag and img_tag.has_attr("src") else ""
|
111 |
+
|
112 |
+
price_tag = product.find("div", class_="Nx9bqj")
|
113 |
+
price = price_tag.get_text(strip=True) if price_tag else None
|
114 |
+
|
115 |
+
if not title or not price or not product_url:
|
116 |
+
continue
|
117 |
+
|
118 |
+
price_val = extract_price(price)
|
119 |
+
|
120 |
+
flipkart_list.append({
|
121 |
+
"title": title,
|
122 |
+
"image": image_url,
|
123 |
+
"price": price,
|
124 |
+
"url": product_url,
|
125 |
+
"price_val": price_val
|
126 |
+
})
|
127 |
+
if len(flipkart_list) >= 6:
|
128 |
+
break
|
129 |
+
else:
|
130 |
+
app.logger.debug("Failed to retrieve Flipkart page")
|
131 |
+
except Exception as e:
|
132 |
+
app.logger.error("Error during Flipkart scraping: %s", e)
|
133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
flipkart_list = sorted(flipkart_list, key=lambda x: x['price_val'])[:6]
|
135 |
|
136 |
return render_template('index.html', amazon=amazon_list, flipkart=flipkart_list)
|
137 |
|
138 |
if __name__ == '__main__':
|
139 |
+
app.run(host='0.0.0.0', port=8080, debug=True)
|