RushiMane2003 commited on
Commit
01dea42
·
verified ·
1 Parent(s): d823940

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -79
app.py CHANGED
@@ -1,9 +1,11 @@
 
 
1
  from flask import Flask, request, render_template
2
  import requests
3
  from bs4 import BeautifulSoup
4
- import re
5
 
6
  app = Flask(__name__)
 
7
 
8
  def extract_price(price_str):
9
  """Extract a numeric value from a price string like '₹1,187'."""
@@ -22,99 +24,116 @@ def index():
22
 
23
  if request.method == 'POST':
24
  product_name = request.form.get('product')
 
25
  if product_name:
26
  # --------- AMAZON SCRAPING -----------
27
- amazon_url = f"https://www.amazon.in/s?k={product_name.replace(' ', '+')}+for+farms"
28
  headers = {
29
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
30
- "AppleWebKit/537.36 (KHTML, like Gecko) "
31
- "Chrome/90.0.4430.212 Safari/537.36"
32
  }
33
- response_amazon = requests.get(amazon_url, headers=headers)
34
- if response_amazon.status_code == 200:
35
- soup_amazon = BeautifulSoup(response_amazon.text, 'html.parser')
36
- products = soup_amazon.find_all("div", attrs={"data-csa-c-item-id": True})
37
- for product in products:
38
- title_tag = product.find("h2", class_="a-size-base-plus")
39
- title = title_tag.get_text(strip=True) if title_tag else None
 
 
 
40
 
41
- img_tag = product.find("img", class_="s-image")
42
- image_url = img_tag["src"] if img_tag and img_tag.has_attr("src") else ""
43
 
44
- price = None
45
- price_tag = product.find("span", class_="a-price")
46
- if price_tag:
47
- offscreen = price_tag.find("span", class_="a-offscreen")
48
- if offscreen:
49
- price = offscreen.get_text(strip=True)
50
-
51
- product_url = None
52
- link_tag = product.find("a", class_="a-link-normal s-line-clamp-4 s-link-style a-text-normal")
53
- if link_tag and link_tag.has_attr("href"):
54
- product_url = link_tag["href"]
55
- if product_url.startswith("/"):
56
- product_url = "https://www.amazon.in" + product_url
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- if not title or not price or not product_url:
59
- continue
60
-
61
- price_val = extract_price(price)
62
-
63
- amazon_list.append({
64
- "title": title,
65
- "image": image_url,
66
- "price": price,
67
- "url": product_url,
68
- "price_val": price_val
69
- })
70
- if len(amazon_list) >= 6:
71
- break
72
  amazon_list = sorted(amazon_list, key=lambda x: x['price_val'])[:6]
73
 
74
  # --------- FLIPKART SCRAPING -----------
75
- flipkart_url = f"https://www.flipkart.com/search?q={product_name.replace(' ', '+')}+for+farms"
76
  headers_flip = {
77
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
78
- "AppleWebKit/537.36 (KHTML, like Gecko) "
79
- "Chrome/90.0.4430.212 Safari/537.36"
80
  }
81
- response_flip = requests.get(flipkart_url, headers=headers_flip)
82
- if response_flip.status_code == 200:
83
- soup_flip = BeautifulSoup(response_flip.text, 'html.parser')
84
- flipkart_products = soup_flip.find_all("div", class_="slAVV4")
85
- for product in flipkart_products:
86
- title_tag = product.find("a", class_="wjcEIp")
87
- title = title_tag.get_text(strip=True) if title_tag else None
88
-
89
- product_url = None
90
- if title_tag and title_tag.has_attr("href"):
91
- product_url = title_tag["href"]
92
- if product_url.startswith("/"):
93
- product_url = "https://www.flipkart.com" + product_url
94
-
95
- img_tag = product.find("img", class_="DByuf4")
96
- image_url = img_tag["src"] if img_tag and img_tag.has_attr("src") else ""
97
-
98
- price_tag = product.find("div", class_="Nx9bqj")
99
- price = price_tag.get_text(strip=True) if price_tag else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- if not title or not price or not product_url:
102
- continue
103
-
104
- price_val = extract_price(price)
105
-
106
- flipkart_list.append({
107
- "title": title,
108
- "image": image_url,
109
- "price": price,
110
- "url": product_url,
111
- "price_val": price_val
112
- })
113
- if len(flipkart_list) >= 6:
114
- break
115
  flipkart_list = sorted(flipkart_list, key=lambda x: x['price_val'])[:6]
116
 
117
  return render_template('index.html', amazon=amazon_list, flipkart=flipkart_list)
118
 
119
  if __name__ == '__main__':
120
- app.run(host='0.0.0.0', port=8080)
 
1
+ import logging
2
+ import re
3
  from flask import Flask, request, render_template
4
  import requests
5
  from bs4 import BeautifulSoup
 
6
 
7
  app = Flask(__name__)
8
+ logging.basicConfig(level=logging.DEBUG)
9
 
10
  def extract_price(price_str):
11
  """Extract a numeric value from a price string like '₹1,187'."""
 
24
 
25
  if request.method == 'POST':
26
  product_name = request.form.get('product')
27
+ app.logger.debug("Search initiated for product: %s", product_name)
28
  if product_name:
29
  # --------- AMAZON SCRAPING -----------
30
+ amazon_url = f"https://www.amazon.in/s?k={product_name.replace(' ', '+')}"
31
  headers = {
32
+ "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
33
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
34
+ "Chrome/90.0.4430.212 Safari/537.36")
35
  }
36
+ try:
37
+ response_amazon = requests.get(amazon_url, headers=headers, timeout=10)
38
+ app.logger.debug("Amazon response status: %s", response_amazon.status_code)
39
+ if response_amazon.status_code == 200:
40
+ soup_amazon = BeautifulSoup(response_amazon.text, 'html.parser')
41
+ products = soup_amazon.find_all("div", attrs={"data-csa-c-item-id": True})
42
+ app.logger.debug("Found %d Amazon products", len(products))
43
+ for product in products:
44
+ title_tag = product.find("h2", class_="a-size-base-plus")
45
+ title = title_tag.get_text(strip=True) if title_tag else None
46
 
47
+ img_tag = product.find("img", class_="s-image")
48
+ image_url = img_tag["src"] if img_tag and img_tag.has_attr("src") else ""
49
 
50
+ price = None
51
+ price_tag = product.find("span", class_="a-price")
52
+ if price_tag:
53
+ offscreen = price_tag.find("span", class_="a-offscreen")
54
+ if offscreen:
55
+ price = offscreen.get_text(strip=True)
56
+
57
+ product_url = None
58
+ link_tag = product.find("a", class_="a-link-normal s-line-clamp-4 s-link-style a-text-normal")
59
+ if link_tag and link_tag.has_attr("href"):
60
+ product_url = link_tag["href"]
61
+ if product_url.startswith("/"):
62
+ product_url = "https://www.amazon.in" + product_url
63
+
64
+ if not title or not price or not product_url:
65
+ continue
66
+
67
+ price_val = extract_price(price)
68
+
69
+ amazon_list.append({
70
+ "title": title,
71
+ "image": image_url,
72
+ "price": price,
73
+ "url": product_url,
74
+ "price_val": price_val
75
+ })
76
+ if len(amazon_list) >= 6:
77
+ break
78
+ else:
79
+ app.logger.debug("Failed to retrieve Amazon page")
80
+ except Exception as e:
81
+ app.logger.error("Error during Amazon scraping: %s", e)
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  amazon_list = sorted(amazon_list, key=lambda x: x['price_val'])[:6]
84
 
85
  # --------- FLIPKART SCRAPING -----------
86
+ flipkart_url = f"https://www.flipkart.com/search?q={product_name.replace(' ', '+')}"
87
  headers_flip = {
88
+ "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
89
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
90
+ "Chrome/90.0.4430.212 Safari/537.36")
91
  }
92
+ try:
93
+ response_flip = requests.get(flipkart_url, headers=headers_flip, timeout=10)
94
+ app.logger.debug("Flipkart response status: %s", response_flip.status_code)
95
+ if response_flip.status_code == 200:
96
+ soup_flip = BeautifulSoup(response_flip.text, 'html.parser')
97
+ flipkart_products = soup_flip.find_all("div", class_="slAVV4")
98
+ app.logger.debug("Found %d Flipkart products", len(flipkart_products))
99
+ for product in flipkart_products:
100
+ title_tag = product.find("a", class_="wjcEIp")
101
+ title = title_tag.get_text(strip=True) if title_tag else None
102
+
103
+ product_url = None
104
+ if title_tag and title_tag.has_attr("href"):
105
+ product_url = title_tag["href"]
106
+ if product_url.startswith("/"):
107
+ product_url = "https://www.flipkart.com" + product_url
108
+
109
+ img_tag = product.find("img", class_="DByuf4")
110
+ image_url = img_tag["src"] if img_tag and img_tag.has_attr("src") else ""
111
+
112
+ price_tag = product.find("div", class_="Nx9bqj")
113
+ price = price_tag.get_text(strip=True) if price_tag else None
114
+
115
+ if not title or not price or not product_url:
116
+ continue
117
+
118
+ price_val = extract_price(price)
119
+
120
+ flipkart_list.append({
121
+ "title": title,
122
+ "image": image_url,
123
+ "price": price,
124
+ "url": product_url,
125
+ "price_val": price_val
126
+ })
127
+ if len(flipkart_list) >= 6:
128
+ break
129
+ else:
130
+ app.logger.debug("Failed to retrieve Flipkart page")
131
+ except Exception as e:
132
+ app.logger.error("Error during Flipkart scraping: %s", e)
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  flipkart_list = sorted(flipkart_list, key=lambda x: x['price_val'])[:6]
135
 
136
  return render_template('index.html', amazon=amazon_list, flipkart=flipkart_list)
137
 
138
  if __name__ == '__main__':
139
+ app.run(host='0.0.0.0', port=8080, debug=True)