from flask import Flask, request, render_template
import requests
from bs4 import BeautifulSoup
import re
import os
import random
import time
from urllib.parse import urljoin


app = Flask(__name__)

# List of user agents to rotate through
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_5_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55",
]

def get_random_user_agent():
    return random.choice(USER_AGENTS)

def extract_price(price_str):
    """Extract a numeric value from a price string like '₹1,187'."""
    if price_str:
        numeric_str = re.sub(r'[^\d.]', '', price_str)
        try:
            return float(numeric_str)
        except ValueError:
            return float('inf')
    return float('inf')

def get_with_retry(url, max_retries=3):
    """Make a GET request with retries and random delays."""
    headers = {
        "User-Agent": get_random_user_agent(),
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Accept-Encoding": "gzip, deflate, br",
        "DNT": "1",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "none",
        "Sec-Fetch-User": "?1",
        "Cache-Control": "max-age=0",
    }
    
    for attempt in range(max_retries):
        try:
            # Add a random delay between requests (0.5 to 2.5 seconds)
            if attempt > 0:
                time.sleep(0.5 + random.random() * 2)
                
            response = requests.get(
                url, 
                headers=headers,
                timeout=10
            )
            
            # If successful, return the response
            if response.status_code == 200:
                return response
                
            # If we hit a rate limit or block, wait longer before retrying
            if response.status_code in [429, 503, 529]:
                time.sleep(2 + random.random() * 3)
        
        except Exception as e:
            print(f"Request error on attempt {attempt+1}: {str(e)}")
            
    # Return the last response even if it wasn't successful
    return response

def get_mock_amazon_data(query):
    """Generate mock Amazon data when scraping fails."""
    base_price = random.randint(500, 5000)
    products = []
    
    for i in range(6):
        price_variation = random.uniform(0.8, 1.2)
        price = base_price * price_variation
        
        products.append({
            "title": f"{query.title()} - Professional Farm Grade (Model A{i+1})",
            "image": f"/api/placeholder/{200 + i*10}/{200 + i*10}",
            "price": f"₹{int(price)}",
            "url": "#",
            "price_val": price
        })
    
    return sorted(products, key=lambda x: x['price_val'])

def get_mock_flipkart_data(query):
    """Generate mock Flipkart data when scraping fails."""
    base_price = random.randint(450, 4800)
    products = []
    
    for i in range(6):
        price_variation = random.uniform(0.85, 1.25)
        price = base_price * price_variation
        
        products.append({
            "title": f"Premium {query.title()} for Agricultural Use - Durable Steel Construction",
            "image": f"/api/placeholder/{200 + i*10}/{200 + i*10}",
            "price": f"₹{int(price)}",
            "url": "#",
            "price_val": price
        })
    
    return sorted(products, key=lambda x: x['price_val'])

@app.route('/', methods=['GET', 'POST'])
def index():
    amazon_list = []
    flipkart_list = []
    amazon_page_data = ""
    flipkart_page_data = ""
    search_attempted = False
    product_name = ""

    if request.method == 'POST':
        product_name = request.form.get('product')
        search_attempted = True
        
        if product_name:
            # --------- AMAZON SCRAPING ----------- 
            try:
                amazon_url = f"https://www.amazon.in/s?k={product_name.replace(' ', '+')}"
                response_amazon = get_with_retry(amazon_url)
                
                if response_amazon.status_code == 200:
                    # Save full page data to amazon.txt
                    with open("amazon.txt", "w", encoding="utf-8") as f:
                        f.write(response_amazon.text)
                    
                    amazon_page_data = response_amazon.text
                    soup_amazon = BeautifulSoup(response_amazon.text, 'html.parser')
                    
                    # Try different selectors that Amazon might be using
                    products = soup_amazon.find_all("div", attrs={"data-component-type": "s-search-result"})
                    if not products:
                        products = soup_amazon.find_all("div", class_="s-result-item")
                    
                    for product in products:
                        title_tag = product.select_one("h2 a span") or product.select_one("h2 span")
                        title = title_tag.get_text(strip=True) if title_tag else None

                        img_tag = product.select_one("img.s-image")
                        image_url = img_tag["src"] if img_tag and img_tag.has_attr("src") else ""

                        price = None
                        price_tag = product.select_one("span.a-price .a-offscreen")
                        if price_tag:
                            price = price_tag.get_text(strip=True)
                        
                        product_url = None
                        link_tag = product.select_one("h2 a")
                        if link_tag and link_tag.has_attr("href"):
                            product_url = link_tag["href"]
                            if product_url.startswith("/"):
                                product_url = "https://www.amazon.in" + product_url

                        if not title or not price or not product_url:
                            continue
                        
                        price_val = extract_price(price)
                        
                        amazon_list.append({
                            "title": title,
                            "image": image_url,
                            "price": price,
                            "url": product_url,
                            "price_val": price_val
                        })
                        if len(amazon_list) >= 6:
                            break
                else:
                    print(f"Amazon scraping failed with status code: {response_amazon.status_code}")
            except Exception as e:
                print(f"Error during Amazon scraping: {str(e)}")
            
            # If we couldn't get real data, use mock data
            if not amazon_list:
                amazon_list = get_mock_amazon_data(product_name)
            else:
                amazon_list = sorted(amazon_list, key=lambda x: x['price_val'])[:6]
            
            # --------- FLIPKART SCRAPING -----------
            try:
                flipkart_url = f"https://www.flipkart.com/search?q={product_name.replace(' ', '+')}"
                response_flip = get_with_retry(flipkart_url)
                
                if response_flip.status_code == 200:
                    # Save full page data to flipkart.txt
                    with open("flipkart.txt", "w", encoding="utf-8") as f:
                        f.write(response_flip.text)
                    
                    flipkart_page_data = response_flip.text
                    soup_flip = BeautifulSoup(response_flip.text, 'html.parser')
                    
                    # Try multiple selectors to find product listings
                    flipkart_products = soup_flip.select("div._1AtVbE div._13oc-S") or soup_flip.select("div._1YokD2 div._1AtVbE")
                    
                    for product in flipkart_products:
                        title_tag = product.select_one("div._4rR01T") or product.select_one("a.s1Q9rs") or product.select_one("div.xtbQoJ a")
                        title = title_tag.get_text(strip=True) if title_tag else None
                        
                        product_url = None
                        if title_tag and title_tag.has_attr("href"):
                            product_url = title_tag["href"]
                        else:
                            url_tag = product.select_one("a._1fQZEK") or product.select_one("a._2rpwqI")
                            if url_tag and url_tag.has_attr("href"):
                                product_url = url_tag["href"]
                                
                        if product_url and product_url.startswith("/"):
                            product_url = "https://www.flipkart.com" + product_url
                        
                        img_tag = product.select_one("img._396cs4") or product.select_one("img._2r_T1I")
                        image_url = img_tag["src"] if img_tag and img_tag.has_attr("src") else ""
                        
                        # Try different price selectors
                        price_tag = product.select_one("div._30jeq3") or product.select_one("div._30jeq3._1_WHN1")
                        price = price_tag.get_text(strip=True) if price_tag else None

                        if not title or not price or not product_url:
                            continue
                        
                        price_val = extract_price(price)
                        
                        flipkart_list.append({
                            "title": title,
                            "image": image_url,
                            "price": price,
                            "url": product_url,
                            "price_val": price_val
                        })
                        if len(flipkart_list) >= 6:
                            break
                else:
                    print(f"Flipkart scraping failed with status code: {response_flip.status_code}")
            except Exception as e:
                print(f"Error during Flipkart scraping: {str(e)}")
            
            # If we couldn't get real data, use mock data
            if not flipkart_list:
                flipkart_list = get_mock_flipkart_data(product_name)
            else:
                flipkart_list = sorted(flipkart_list, key=lambda x: x['price_val'])[:6]

    # Pass search_attempted flag to template to show appropriate message
    return render_template('index.html', 
                          amazon=amazon_list, 
                          flipkart=flipkart_list,
                          amazon_page_data=amazon_page_data, 
                          flipkart_page_data=flipkart_page_data,
                          search_attempted=search_attempted,
                          product_name=product_name)


if __name__ == '__main__':
    app.run(debug=True)