Spaces:

fmab777
/

telegram-summary-bot

Running

File size: 117,973 Bytes

72d2506
e9e9e3a
 
 
 
 
cd72c3f
 
 
f4b250d
aacf1d8
e9e9e3a
6a18fe1
cd72c3f
f55e243
 
 
6a18fe1
 
cd72c3f
e9e9e3a
 
 
 
 
 
4629ea6
e9e9e3a
6a18fe1
b3e1b64
3ac7b5f
e9e9e3a
6a18fe1
7c987d0
b3e1b64
e9e9e3a
b3e1b64
f55e243
 
 
 
 
 
eeeef0c
 
 
5c64bb0
eeeef0c
 
 
 
 
 
5c64bb0
 
aacf1d8
 
 
 
 
 
 
 
 
 
 
72d2506
aacf1d8
 
 
eeeef0c
e9e9e3a
9412ba0
7e4a1c6
6a18fe1
 
073dd22
6a18fe1
7e4a1c6
 
eeeef0c
72d2506
 
aacf1d8
5c64bb0
f55e243
b5cdfb8
aacf1d8
b51c818
6a18fe1
cd72c3f
 
e9e9e3a
b1bdfa0
72d2506
b1bdfa0
 
 
 
72d2506
b1bdfa0
72d2506
 
b1bdfa0
 
3ac7b5f
 
e9e9e3a
 
5c64bb0
 
e9e9e3a
 
 
7e50d9a
aacf1d8
f4b250d
aacf1d8
 
9412ba0
7e50d9a
3ac7b5f
c83a4a7
72d2506
7e50d9a
72d2506
7c987d0
7e50d9a
aacf1d8
 
7e50d9a
7c987d0
c83a4a7
 
aacf1d8
 
b5cdfb8
 
c83a4a7
 
 
b5cdfb8
c83a4a7
 
aacf1d8
 
 
 
7e50d9a
 
aacf1d8
5c64bb0
3ac7b5f
 
72d2506
7e50d9a
 
 
aacf1d8
 
7e50d9a
5c64bb0
f4b250d
 
b51c818
eeeef0c
5c64bb0
 
 
d6e3c43
aacf1d8
5c64bb0
b3e1b64
5c64bb0
b3e1b64
5c64bb0
 
 
 
 
f55e243
aacf1d8
e9e9e3a
5c64bb0
 
e9e9e3a
5c64bb0
b3e1b64
5c64bb0
 
 
3ac7b5f
7c987d0
5c64bb0
f4b250d
b3e1b64
c83a4a7
 
7e50d9a
 
5c64bb0
b3e1b64
 
c83a4a7
 
3ac7b5f
 
7e50d9a
c83a4a7
3ac7b5f
7e50d9a
5c64bb0
 
7e50d9a
5c64bb0
 
 
 
 
c83a4a7
 
 
 
5c64bb0
3ac7b5f
 
7e50d9a
 
 
 
 
5c64bb0
c83a4a7
 
b1bdfa0
c83a4a7
5c64bb0
b3e1b64
b1bdfa0
7e50d9a
c83a4a7
7e50d9a
5c64bb0
332d5fc
3ac7b5f
c83a4a7
332d5fc
b1bdfa0
7e50d9a
 
 
b1bdfa0
5c64bb0
 
7e50d9a
 
 
 
 
 
 
 
 
 
 
 
 
 
3ac7b5f
 
b1bdfa0
5c64bb0
 
c83a4a7
 
b3e1b64
5c64bb0
 
 
 
7e50d9a
 
 
 
c83a4a7
 
 
 
5c64bb0
 
 
7e50d9a
c83a4a7
7e50d9a
b1bdfa0
c83a4a7
7e50d9a
 
5c64bb0
7e50d9a
5c64bb0
 
 
f4b250d
 
72d2506
aacf1d8
 
b1bdfa0
aacf1d8
72d2506
aacf1d8
72d2506
 
aacf1d8
72d2506
aacf1d8
b1bdfa0
72d2506
 
 
 
 
 
 
 
 
aacf1d8
72d2506
 
aacf1d8
 
 
 
72d2506
 
b1bdfa0
72d2506
aacf1d8
72d2506
aacf1d8
 
72d2506
aacf1d8
72d2506
 
aacf1d8
72d2506
 
aacf1d8
 
72d2506
 
 
 
 
 
aacf1d8
 
72d2506
 
 
 
 
aacf1d8
72d2506
aacf1d8
 
72d2506
 
 
aacf1d8
72d2506
 
 
aacf1d8
 
 
72d2506
 
aacf1d8
c261e5f
72d2506
 
aacf1d8
 
72d2506
aacf1d8
b1bdfa0
c261e5f
72d2506
 
 
 
 
aacf1d8
72d2506
 
 
 
 
 
 
aacf1d8
 
 
f4b250d
aacf1d8
f4b250d
 
 
aacf1d8
f4b250d
aacf1d8
b1bdfa0
f4b250d
b1bdfa0
aacf1d8
b1bdfa0
7e50d9a
b1bdfa0
 
aacf1d8
 
 
 
 
f4b250d
 
aacf1d8
 
 
 
0f34792
b1bdfa0
b3e1b64
7e50d9a
 
 
72d2506
 
 
 
 
7e50d9a
 
72d2506
 
aacf1d8
72d2506
 
 
 
 
 
7e50d9a
 
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c83a4a7
72d2506
 
 
 
 
 
 
 
 
 
3ac7b5f
aacf1d8
3ac7b5f
aacf1d8
 
 
 
c83a4a7
 
 
b3e1b64
c83a4a7
aacf1d8
c83a4a7
aacf1d8
c83a4a7
 
 
 
aacf1d8
72d2506
 
 
 
 
 
aacf1d8
 
 
 
 
 
 
 
 
 
f4b250d
aacf1d8
 
 
 
f4b250d
b1bdfa0
f4b250d
aacf1d8
f4b250d
aacf1d8
 
f4b250d
aacf1d8
f4b250d
 
 
aacf1d8
f4b250d
 
72d2506
 
 
 
 
 
 
aacf1d8
 
 
b1bdfa0
aacf1d8
 
 
 
 
 
f4b250d
aacf1d8
 
f4b250d
aacf1d8
 
 
 
 
 
f4b250d
 
aacf1d8
 
f4b250d
aacf1d8
f4b250d
 
aacf1d8
 
 
72d2506
 
 
 
 
 
 
 
 
f4b250d
72d2506
 
f4b250d
 
72d2506
 
 
 
 
 
 
aacf1d8
 
b1bdfa0
aacf1d8
 
 
 
 
 
 
 
 
 
7e50d9a
aacf1d8
 
 
7e50d9a
aacf1d8
72d2506
 
7e50d9a
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e50d9a
 
 
72d2506
aacf1d8
7e50d9a
aacf1d8
7e50d9a
 
 
 
aacf1d8
72d2506
7e50d9a
 
aacf1d8
72d2506
aacf1d8
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e50d9a
72d2506
7e50d9a
aacf1d8
 
 
 
 
 
 
 
 
 
 
7e50d9a
 
 
aacf1d8
7e50d9a
aacf1d8
 
7e50d9a
 
 
aacf1d8
7e50d9a
aacf1d8
 
7e50d9a
 
 
f4b250d
b5cdfb8
5c64bb0
b5cdfb8
5c64bb0
 
 
 
c83a4a7
 
5c64bb0
 
 
 
 
 
 
 
c261e5f
5c64bb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c261e5f
5c64bb0
 
72d2506
 
5c64bb0
 
 
 
 
aacf1d8
72d2506
 
 
46693ee
c83a4a7
d6e3c43
5c64bb0
c83a4a7
5c64bb0
72d2506
 
 
 
 
 
 
5c64bb0
 
72d2506
7e50d9a
46693ee
72d2506
7e50d9a
 
72d2506
 
 
 
 
7e50d9a
46693ee
72d2506
 
7e50d9a
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
5c64bb0
72d2506
 
 
 
 
 
 
 
 
 
 
 
5c64bb0
7e50d9a
5c64bb0
 
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e50d9a
b5cdfb8
5c64bb0
b5cdfb8
5c64bb0
 
 
 
c83a4a7
 
5c64bb0
 
 
 
 
 
 
 
7e50d9a
5c64bb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e50d9a
5c64bb0
 
72d2506
 
 
5c64bb0
72d2506
5c64bb0
 
 
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
5c64bb0
72d2506
 
 
c83a4a7
d6e3c43
5c64bb0
aacf1d8
5c64bb0
aacf1d8
72d2506
c83a4a7
 
 
5c64bb0
72d2506
 
 
 
5c64bb0
 
72d2506
 
 
 
 
 
 
 
 
 
c83a4a7
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c83a4a7
72d2506
 
 
 
5c64bb0
 
72d2506
 
 
 
 
 
 
 
 
 
 
7e50d9a
b5cdfb8
aacf1d8
b5cdfb8
5c64bb0
aacf1d8
5c64bb0
c83a4a7
5c64bb0
7e50d9a
aacf1d8
 
 
c83a4a7
 
5c64bb0
7e50d9a
aacf1d8
c83a4a7
7e50d9a
72d2506
 
 
 
c83a4a7
aacf1d8
 
 
5c64bb0
72d2506
aacf1d8
 
c83a4a7
 
aacf1d8
f4b250d
5c64bb0
aacf1d8
5c64bb0
 
72d2506
 
 
 
 
 
 
 
f4b250d
 
 
 
72d2506
f4b250d
72d2506
 
f4b250d
c83a4a7
f4b250d
72d2506
5c64bb0
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
5c64bb0
 
 
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4b250d
045ba8b
7e50d9a
72d2506
045ba8b
5c64bb0
f4b250d
5c64bb0
72d2506
5c64bb0
72d2506
 
 
 
 
 
 
c83a4a7
72d2506
 
 
f4b250d
b1bdfa0
72d2506
aacf1d8
 
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
aacf1d8
b1bdfa0
aacf1d8
72d2506
aacf1d8
 
 
f4b250d
b1bdfa0
5c64bb0
7e50d9a
f4b250d
5c64bb0
 
7e50d9a
72d2506
f4b250d
b1bdfa0
f4b250d
7e50d9a
f4b250d
 
 
7e50d9a
72d2506
f4b250d
b1bdfa0
f4b250d
7e50d9a
f4b250d
 
 
7e50d9a
72d2506
7e50d9a
b1bdfa0
7e50d9a
 
 
 
 
 
72d2506
f4b250d
b1bdfa0
7e50d9a
 
 
 
 
 
72d2506
7e50d9a
72d2506
f4b250d
72d2506
b1bdfa0
f4b250d
 
5c64bb0
72d2506
 
 
 
 
 
 
7e50d9a
72d2506
5c64bb0
b1bdfa0
f4b250d
72d2506
f4b250d
72d2506
f4b250d
5c64bb0
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4b250d
72d2506
 
f4b250d
 
72d2506
 
b1bdfa0
72d2506
 
f4b250d
 
5c64bb0
72d2506
 
 
 
 
 
 
 
 
f4b250d
5c64bb0
72d2506
 
5c64bb0
72d2506
 
 
 
 
5c64bb0
f4b250d
72d2506
 
5c64bb0
72d2506
 
 
 
 
 
 
 
 
 
 
5c64bb0
72d2506
 
 
 
 
 
 
c83a4a7
f4b250d
b1bdfa0
d19297a
3ac7b5f
b3e1b64
7e50d9a
f4b250d
d19297a
 
 
b3e1b64
7e50d9a
 
 
 
 
 
72d2506
 
 
7e50d9a
 
 
d19297a
 
 
 
72d2506
d19297a
72d2506
 
7e50d9a
72d2506
f4b250d
7e50d9a
 
72d2506
 
7e50d9a
72d2506
 
7e50d9a
72d2506
 
7e50d9a
 
 
72d2506
7e50d9a
 
72d2506
 
 
 
7e50d9a
72d2506
 
 
 
 
 
 
 
 
7e50d9a
d19297a
 
 
72d2506
 
 
 
 
 
 
f4b250d
72d2506
 
 
 
 
f4b250d
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d19297a
72d2506
 
 
5c64bb0
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4b250d
72be728
72d2506
 
b1bdfa0
c261e5f
72d2506
72be728
72d2506
 
b1bdfa0
c261e5f
72d2506
 
 
 
6e780a2
72d2506
 
f4b250d
72d2506
 
 
 
 
 
 
 
 
f4b250d
 
72d2506
d6e3c43
0f34792
f4b250d
5c64bb0
72d2506
 
 
f4b250d
 
b1bdfa0
5c64bb0
6a18fe1
5c64bb0
 
72d2506
aacf1d8
5c64bb0
72d2506
f4b250d
 
72d2506
7e50d9a
 
72d2506
f4b250d
72d2506
f4b250d
 
5c64bb0
6a18fe1
768889e
5c64bb0
 
 
7e50d9a
72d2506
6a18fe1
7e50d9a
 
 
 
72d2506
7e50d9a
72d2506
 
5c64bb0
 
7e50d9a
a1a607a
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4b250d
5c64bb0
 
72d2506
7e50d9a
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
5c64bb0
72d2506
5c64bb0
aacf1d8
72d2506
 
 
 
 
 
 
 
f4b250d
 
7e50d9a
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c64bb0
073dd22
72d2506
b3e1b64
f4b250d
5c64bb0
aacf1d8
 
581ea2b
72d2506
c83a4a7
72d2506
 
 
 
 
 
 
581ea2b
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68d4bf1
b3e1b64
f4b250d
72d2506
 
 
 
 
 
f4b250d
72d2506
 
 
 
 
 
 
 
 
 
f4b250d
 
72d2506
 
 
f4b250d
72d2506
 
 
 
 
c83a4a7
f4b250d
 
 
72d2506
f4b250d
72d2506
 
 
 
 
 
 
 
f4b250d
 
b1bdfa0
72d2506
 
 
 
 
 
 
 
 
a4284b2
72d2506
e9e9e3a
c83a4a7
72d2506
 
 
 
c261e5f
 
72d2506
c261e5f
72d2506
 
c261e5f
72d2506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c83a4a7
72d2506

# main.py (Corrected PermissionError and Integrated Crawl4AI as Primary)
import os
import re
import logging
import asyncio
import json
import html
import contextlib
import traceback
import urllib.parse # Added for URL encoding
from typing import Optional, Dict, Any, Tuple, Union # Added Union

# --- Frameworks ---
from starlette.applications import Starlette
from starlette.routing import Route
from starlette.responses import PlainTextResponse, JSONResponse, Response
from starlette.requests import Request

# --- Telegram Bot ---
from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup, Bot
from telegram.ext import (
    Application,
    CommandHandler,
    MessageHandler,
    filters,
    ContextTypes,
    CallbackQueryHandler,
)
from telegram.constants import ParseMode
from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest, TelegramError
from telegram.request import HTTPXRequest, BaseRequest

# --- Other Libraries ---
import httpx
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from bs4 import BeautifulSoup
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type, before_sleep_log
try:
    import lxml
    DEFAULT_PARSER = 'lxml'
except ImportError:
    DEFAULT_PARSER = 'html.parser'

# --- Google Gemini ---
try:
    import google.generativeai as genai
    from google.generativeai.types import HarmCategory, HarmBlockThreshold
    _gemini_available = True
except ImportError:
    genai = None
    HarmCategory = None
    HarmBlockThreshold = None
    _gemini_available = False
    # logger will be defined later, log warning after logger setup

# --- Crawl4AI (NEW Primary Scraper) ---
try:
    from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, BrowserConfig, CacheMode, CrawlResult
    from crawl4ai.models import MarkdownGenerationResult # Specific import for type hint
    _crawl4ai_available = True
except ImportError:
    AsyncWebCrawler = None
    CrawlerRunConfig = None
    BrowserConfig = None
    CacheMode = None
    CrawlResult = None
    MarkdownGenerationResult = None # Corrected typo
    _crawl4ai_available = False
    # logger will be defined later, log warning after logger setup


# --- Logging Setup ---
logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO )
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("telegram.ext").setLevel(logging.INFO)
logging.getLogger('telegram.bot').setLevel(logging.INFO)
logging.getLogger("urllib3").setLevel(logging.INFO)
logging.getLogger('gunicorn.error').setLevel(logging.INFO)
logging.getLogger('uvicorn').setLevel(logging.INFO)
logging.getLogger('starlette').setLevel(logging.INFO)
if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
# Keep C4AI logs less verbose unless debugging
if _crawl4ai_available: logging.getLogger("crawl4ai").setLevel(logging.WARNING)

logger = logging.getLogger(__name__)
logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
if not _gemini_available: logger.warning("google-generativeai library not found. Gemini functionality disabled.")
if not _crawl4ai_available: logger.warning("crawl4ai library not found. Primary Web Scraping (Crawl4AI) disabled.")


# --- Global variable for PTB app ---
ptb_app: Optional[Application] = None

# --- Define a writable base directory for Crawl4AI ---
# Use /app which is the WORKDIR in the Dockerfile and is generally writable
CRAWL4AI_BASE_DIR = "/app/.crawl4ai_cache"
if _crawl4ai_available:
    try:
        os.makedirs(CRAWL4AI_BASE_DIR, exist_ok=True)
        logger.info(f"Ensured Crawl4AI base directory exists and is writable: {CRAWL4AI_BASE_DIR}")
    except Exception as e:
        # Log error but proceed, Crawl4AI might still work without cache/db
        logger.error(f"Could not create Crawl4AI base directory {CRAWL4AI_BASE_DIR}: {e}. Crawl4AI caching/DB features might fail.")


# --- Environment Variable Loading & Configuration ---
logger.info("Attempting to load secrets and configuration...")
def get_secret(secret_name):
    value = os.environ.get(secret_name)
    if value: status = "Found"; log_length = min(len(value), 8); value_start = value[:log_length]; logger.info(f"Secret '{secret_name}': {status} (Value starts with: {value_start}...)")
    else: status = "Not Found"; logger.warning(f"Secret '{secret_name}': {status}")
    return value

TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY') # Summarizer Fallback
URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY') # Scrape Fallback 2 (WAS 1)
SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY') # YT Fallback 1
APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')   # YT Fallback 2 + Scrape Fallbacks 5 & 6 (WAS 4 & 5)
RAPIDAPI_KEY = get_secret('RAPIDAPI_KEY')         # Scrape Fallbacks 3 & 4 (WAS 2 & 3)
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
GEMINI_API_KEY = get_secret('GEMINI_API_KEY')     # Primary Summarizer

# Models (User can still configure via env vars)
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-coder-33b-instruct") # Fallback Model
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") # Default YT Actor
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-1.5-flash-latest") # Primary Model

# Specific Actor IDs for Website Scraping Fallbacks
APIFY_CRAWLER_ACTOR_ID = "apify/website-content-crawler" # Fallback 5 (WAS 4)
APIFY_TEXT_SCRAPER_ACTOR_ID = "karamelo/text-scraper-free" # Fallback 6 (WAS 5)

if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
if not GEMINI_API_KEY: logger.error("❌ ERROR: GEMINI_API_KEY not found. Primary summarization (Gemini) will fail.")
if not OPENROUTER_API_KEY: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback summarization will fail.")
if not RAPIDAPI_KEY: logger.warning("⚠️ WARNING: RAPIDAPI_KEY not found. RapidAPI scraping fallbacks (3 & 4) will be unavailable.") # Updated numbers
if not APIFY_API_TOKEN: logger.warning("⚠️ WARNING: APIFY_API_TOKEN not found. YT transcript fallback (2) and Website scraping fallbacks (5 & 6) will be unavailable.") # Updated numbers

_gemini_primary_enabled = _gemini_available and bool(GEMINI_API_KEY)
if not _gemini_available: logger.warning("⚠️ WARNING: google-generativeai library missing. Gemini disabled.")
elif not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found or empty. Gemini disabled.")

_openrouter_fallback_enabled = bool(OPENROUTER_API_KEY)
if not _openrouter_fallback_enabled: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback disabled.")

_crawl4ai_primary_scrape_enabled = _crawl4ai_available # Check if library loaded
if not _crawl4ai_available: logger.error("❌ ERROR: crawl4ai library missing. Primary web scraping disabled. Will attempt fallbacks immediately.")

if not URLTOTEXT_API_KEY: logger.warning("Optional secret 'URLTOTEXT_API_KEY' not found. Web scraping fallback 2 unavailable.") # Updated number
if not SUPADATA_API_KEY: logger.warning("Optional secret 'SUPADATA_API_KEY' not found. YT transcript fallback 1 unavailable.")
# APIFY_API_TOKEN warning handled above
# RAPIDAPI_KEY warning handled above
if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")

logger.info("Secret loading and configuration check finished.")
logger.info(f"Primary Web Scraper (Crawl4AI): {'ENABLED' if _crawl4ai_primary_scrape_enabled else 'DISABLED - Check Logs for Details'}")
logger.info(f"Using Gemini Model (Primary Summarizer): {GEMINI_MODEL if _gemini_primary_enabled else 'DISABLED'}")
logger.info(f"Using OpenRouter Model (Fallback Summarizer): {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}")
logger.info(f"Using Apify Actor (YT Default): {APIFY_ACTOR_ID}")
logger.info(f"Using Apify Actor (Web Scrape Fallback 5): {APIFY_CRAWLER_ACTOR_ID}")
logger.info(f"Using Apify Actor (Web Scrape Fallback 6): {APIFY_TEXT_SCRAPER_ACTOR_ID}")

_apify_token_exists = bool(APIFY_API_TOKEN)
_urltotext_key_exists = bool(URLTOTEXT_API_KEY)
_rapidapi_key_exists = bool(RAPIDAPI_KEY)


if _gemini_primary_enabled:
    try: genai.configure(api_key=GEMINI_API_KEY); logger.info("Google GenAI client configured successfully.")
    except Exception as e: logger.error(f"Failed to configure Google GenAI client: {e}"); _gemini_primary_enabled = False

# --- Retry Decorator (Unchanged) ---
@retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
async def retry_bot_operation(func, *args, **kwargs):
    try: return await func(*args, **kwargs)
    except BadRequest as e:
        ignore_errors = [ "message is not modified", "query is too old", "message to edit not found", "chat not found", "bot was blocked by the user", ]
        if any(err in str(e).lower() for err in ignore_errors): logger.warning(f"Ignoring non-critical BadRequest: {e}"); return None
        logger.error(f"Potentially critical BadRequest: {e}"); raise
    except TelegramError as e: logger.warning(f"TelegramError (will retry if applicable): {e}"); raise
    except Exception as e: logger.error(f"Unexpected error during bot operation: {e}", exc_info=True); raise

# --- Helper Functions (Unchanged) ---
def is_youtube_url(url):
    youtube_regex = re.compile( r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/' r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
    match = youtube_regex.search(url); logger.debug(f"is_youtube_url '{url}': {bool(match)}"); return bool(match)
def extract_youtube_id(url):
    youtube_regex = re.compile( r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/' r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
    match = youtube_regex.search(url)
    if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
    else: logger.warning(f"Could not extract YT ID from {url}"); return None


# --- Content Fetching Functions ---

# --- YouTube Transcript Fetching (Unchanged) ---
async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
    if not video_id: logger.error("[Supadata] No video_id provided"); return None
    if not api_key: logger.error("[Supadata] API key missing."); return None
    logger.info(f"[YT Fallback 1] Attempting fetch for video ID: {video_id} via Supadata")
    api_endpoint = "https://api.supadata.ai/v1/youtube/transcript"
    params = {"videoId": video_id, "format": "text"}; headers = {"X-API-Key": api_key}
    try:
        async with httpx.AsyncClient(timeout=30.0) as client:
            response = await client.get(api_endpoint, headers=headers, params=params)
            logger.debug(f"[Supadata] Status code {response.status_code} for {video_id}")
            if response.status_code == 200:
                try:
                    data = response.json() if response.text else None # Check if text exists before json decode
                    content = None
                    if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
                    if not content and response.text: content = response.text # Fallback to raw text if json parse fails or content key missing
                    if content and isinstance(content, str): logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}"); return content.strip()
                    else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
                except json.JSONDecodeError: logger.warning(f"[Supadata] Received 200 but failed JSON decode for {video_id}. Using raw text if available. Response: {response.text[:200]}"); return response.text.strip() if response.text else None
                except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
            elif response.status_code in [401, 403]: logger.error(f"[Supadata] Auth error ({response.status_code}). Check API key."); return None
            elif response.status_code == 404: logger.warning(f"[Supadata] Not found (404) for {video_id}."); return None
            else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
    except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
    except httpx.RequestError as e:
        if "CERTIFICATE_VERIFY_FAILED" in str(e): logger.error(f"[Supadata] SSL Cert Verify Failed for {video_id}: {e}")
        else: logger.error(f"[Supadata] Request error for {video_id}: {e}")
        return None
    except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None

async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
    """Fallback YT 2: Fetches YouTube transcript using default Apify Actor."""
    global APIFY_ACTOR_ID # Uses the default YT actor ID
    if not video_url: logger.error("[Apify YT] No video_url provided"); return None
    if not api_token: logger.error("[Apify YT] API token missing."); return None
    logger.info(f"[YT Fallback 2] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")

    sync_items_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
    params = {"token": api_token}
    payload = { "urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5, "channelHandleBoolean": False, "channelNameBoolean": False, "datePublishedBoolean": False, "relativeDateTextBoolean": False, }
    headers = {"Content-Type": "application/json"}

    try:
        async with httpx.AsyncClient(timeout=120.0) as client:
            logger.debug(f"[Apify YT] POST Request to {sync_items_endpoint} for {video_url}")
            response = await client.post(sync_items_endpoint, headers=headers, params=params, json=payload)
            logger.debug(f"[Apify YT] Received status code {response.status_code} for {video_url}")

            if response.status_code == 200:
                try:
                    results = response.json()
                    if isinstance(results, list) and len(results) > 0:
                        item = results[0]; content = None
                        if "captions" in item and isinstance(item["captions"], str): content = item["captions"]
                        elif "text" in item and isinstance(item["text"], str): content = item["text"]
                        elif "transcript" in item and isinstance(item["transcript"], str): content = item["transcript"]
                        elif "captions" in item and isinstance(item["captions"], list):
                             if len(item["captions"]) > 0 and isinstance(item["captions"][0], dict) and 'text' in item["captions"][0]: content = " ".join(line.get("text", "") for line in item["captions"] if line.get("text"))
                             elif len(item["captions"]) > 0 and isinstance(item["captions"][0], str): content = " ".join(item["captions"])

                        if content and isinstance(content, str): logger.info(f"[Apify YT] Success via REST for {video_url}. Length: {len(content)}"); return content.strip()
                        else: logger.warning(f"[Apify YT] Dataset item parsed but transcript content empty/invalid format for {video_url}. Item keys: {list(item.keys())}"); return None
                    else: logger.warning(f"[Apify YT] Actor success but dataset was empty for {video_url}. Response: {results}"); return None
                except json.JSONDecodeError: logger.error(f"[Apify YT] Failed JSON decode. Status:{response.status_code}. Resp:{response.text[:200]}"); return None
                except Exception as e: logger.error(f"[Apify YT] Error processing success response for {video_url}: {e}", exc_info=True); return None
            elif response.status_code == 400: logger.error(f"[Apify YT] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
            elif response.status_code == 401: logger.error("[Apify YT] Auth error (401). Check token."); return None
            elif response.status_code == 404: logger.error(f"[Apify YT] Endpoint/Actor Not Found (404). Actor: {APIFY_ACTOR_ID} Resp:{response.text[:200]}"); return None
            else: logger.error(f"[Apify YT] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
    except httpx.TimeoutException as e: logger.error(f"[Apify YT] Timeout during API interaction for {video_url}: {e}"); return None
    except httpx.HTTPStatusError as e: logger.error(f"[Apify YT] HTTP Status Error during API interaction for {video_url}: {e}"); return None
    except httpx.RequestError as e: logger.error(f"[Apify YT] Request error during API interaction for {video_url}: {e}"); return None
    except Exception as e: logger.error(f"[Apify YT] Unexpected error during Apify YT call for {video_url}: {e}", exc_info=True); return None

async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
    global SUPADATA_API_KEY, APIFY_API_TOKEN, _apify_token_exists
    if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
    logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
    transcript_text = None
    logger.info("[Primary YT] Attempting youtube-transcript-api...")
    try:
        transcript_list = await asyncio.to_thread( YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'] )
        if transcript_list: transcript_text = " ".join([item['text'] for item in transcript_list if 'text' in item])
        if transcript_text: logger.info(f"[Primary YT] Success via lib for {video_id} (len: {len(transcript_text)})"); return transcript_text
        else: logger.warning(f"[Primary YT] Transcript list/text empty for {video_id}"); transcript_text = None
    except NoTranscriptFound: logger.warning(f"[Primary YT] No transcript found via lib for {video_id}.")
    except TranscriptsDisabled: logger.warning(f"[Primary YT] Transcripts disabled via lib for {video_id}.")
    except Exception as e: logger.warning(f"[Primary YT] Error via lib for {video_id}: {e}"); transcript_text = None

    if transcript_text is None:
        logger.info("[Fallback YT 1] Trying Supadata API...")
        if SUPADATA_API_KEY:
            transcript_text = await get_transcript_via_supadata(video_id, SUPADATA_API_KEY)
            if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text
            else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
        else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")

    if transcript_text is None:
        logger.info("[Fallback YT 2] Trying Apify REST API (Default YT Actor)...")
        if _apify_token_exists:
            transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
            if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify Default YT Actor for {video_url}"); return transcript_text
            else: logger.warning(f"[Fallback YT 2] Apify Default YT Actor failed or no content for {video_url}.")
        else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")

    if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
    return transcript_text

# --- Website Content Fetching (MODIFIED SECTION) ---

# --- Method 0: Primary Web Scrape (Crawl4AI) ---
async def get_website_content_via_crawl4ai(url: str) -> Optional[str]:
    """Primary Web Method: Fetches and extracts content using Crawl4AI."""
    global _crawl4ai_primary_scrape_enabled, CRAWL4AI_BASE_DIR # Use the defined base dir
    if not _crawl4ai_primary_scrape_enabled:
        logger.warning("[Web Scrape Primary] Crawl4AI called but library/driver is unavailable.")
        return None
    if not url: logger.error("[Web Scrape Primary] Crawl4AI: No URL provided"); return None
    logger.info(f"[Web Scrape Primary] Attempting fetch and extraction via Crawl4AI for: {url}")

    # Configure the crawl run - enable cache now
    run_config = CrawlerRunConfig(
        cache_mode=CacheMode.ENABLED, # Use cache now that base_dir is set
        page_timeout=60000,           # 60 sec timeout
        verbose=False,                # Keep logs cleaner
        scan_full_page=True,          # Try to load dynamic content by scrolling
        remove_overlay_elements=True, # Try to remove cookie banners/popups
        # Consider adding markdown generation strategy if needed later
        # from crawl4ai.content_filter_strategy import PruningContentFilter
        # from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
        # md_generator = DefaultMarkdownGenerator(content_filter=PruningContentFilter())
        # markdown_generator=md_generator,
    )

    # BrowserConfig defaults are usually okay (headless chromium)
    # browser_config = BrowserConfig(headless=True, verbose=False)

    extracted_text: Optional[str] = None
    try:
        # Use context manager and provide base_directory to fix PermissionError
        # Pass browser_config if needed: AsyncWebCrawler(config=browser_config, base_directory=CRAWL4AI_BASE_DIR)
        async with AsyncWebCrawler(base_directory=CRAWL4AI_BASE_DIR) as crawler:
            logger.debug(f"[Web Scrape Primary] Calling Crawl4AI crawler.arun for {url}")
            result: CrawlResult = await crawler.arun(url=url, config=run_config)
            logger.debug(f"[Web Scrape Primary] Crawl4AI arun completed. Success: {result.success}, Status: {result.status_code}")

            if result.success:
                # Check for markdown generation result first (preferred)
                if result.markdown and isinstance(result.markdown, MarkdownGenerationResult):
                    # Prioritize 'fit_markdown' if available and substantial
                    if result.markdown.fit_markdown and isinstance(result.markdown.fit_markdown, str) and len(result.markdown.fit_markdown.strip()) > 50:
                        extracted_text = result.markdown.fit_markdown.strip()
                        logger.debug(f"[Web Scrape Primary] Using 'fit_markdown' from MarkdownGenerationResult for {url}")
                    # Fallback to 'raw_markdown' if 'fit_markdown' is missing/short
                    elif result.markdown.raw_markdown and isinstance(result.markdown.raw_markdown, str):
                        extracted_text = result.markdown.raw_markdown.strip()
                        logger.debug(f"[Web Scrape Primary] Using 'raw_markdown' (fit_markdown unavailable/short) for {url}")
                    else:
                        logger.warning(f"[Web Scrape Primary] Markdown object present but no usable text content (fit/raw) for {url}. Trying cleaned_html.")
                        # Fall through to cleaned_html parsing if markdown is unusable

                # Handle if result.markdown is just a string (older version compatibility?)
                elif result.markdown and isinstance(result.markdown, str):
                     extracted_text = result.markdown.strip()
                     logger.debug(f"[Web Scrape Primary] Using direct result.markdown string for {url}")

                # If no markdown or unusable markdown, try parsing cleaned_html
                if not extracted_text and result.cleaned_html:
                    logger.warning(f"[Web Scrape Primary] No usable markdown found, parsing cleaned_html with BS4 for {url}")
                    try:
                         # Use a simple BS4 parse as a fallback within Crawl4AI's result
                         soup = BeautifulSoup(result.cleaned_html, DEFAULT_PARSER)
                         extracted_text = " ".join(line.strip() for line in soup.get_text(separator='\n', strip=True).splitlines() if line.strip())
                    except Exception as bs_err:
                        logger.error(f"[Web Scrape Primary] Error parsing Crawl4AI's cleaned_html with BS4 for {url}: {bs_err}")
                        extracted_text = None # Ensure it's None if parsing fails

                # Final check on extracted text length
                if extracted_text and len(extracted_text) > 50: # Check for meaningful content length
                    logger.info(f"[Web Scrape Primary] Success via Crawl4AI for {url}. Length: {len(extracted_text)}")
                    return extracted_text
                else:
                    content_len = len(extracted_text) if extracted_text else 0
                    logger.warning(f"[Web Scrape Primary] Crawl4AI success but extracted text too short or empty for {url}. Length: {content_len}. Will try fallbacks.")
                    return None # Return None to trigger fallbacks
            else:
                error_msg = result.error_message or f"Crawl failed (status code: {result.status_code})"
                logger.error(f"[Web Scrape Primary] Crawl4AI failed for {url}. Error: {error_msg}. Will try fallbacks.")
                return None # Return None to trigger fallbacks

    except asyncio.TimeoutError:
         logger.error(f"[Web Scrape Primary] Timeout error during Crawl4AI crawl for {url}. Will try fallbacks.")
         return None
    except ImportError as ie:
         if "playwright" in str(ie).lower():
              logger.critical(f"[Web Scrape Primary] Playwright library missing or drivers not installed! Run 'pip install playwright && playwright install --with-deps'. Error: {ie}")
              _crawl4ai_primary_scrape_enabled = False # Disable future attempts
         else:
             logger.error(f"[Web Scrape Primary] Unexpected ImportError during Crawl4AI execution for {url}: {ie}", exc_info=True)
         return None # Return None to trigger fallbacks
    except Exception as e:
        # Catch potential Playwright errors about missing executables explicitly
        if "playwright" in str(e).lower() and ("install" in str(e).lower() or "executable" in str(e).lower() or "path" in str(e).lower()):
             logger.critical("[Web Scrape Primary] Playwright drivers likely missing! Run 'playwright install --with-deps' in your environment. Disabling Crawl4AI.")
             _crawl4ai_primary_scrape_enabled = False # Disable future attempts
        else:
            logger.error(f"[Web Scrape Primary] Unexpected error during Crawl4AI execution for {url}: {e}", exc_info=True)
        return None # Return None to trigger fallbacks


# --- Fallback 1: Direct Fetch + BS4 (Previously Primary) ---
async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
    """Directly fetches URL content using httpx. (Fallback Web Method 1 - Fetching part)"""
    headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
    try:
        async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
            logger.debug(f"[Web Scrape Fallback 1] Sending GET request to {url}")
            response = await client.get(url)
            logger.debug(f"[Web Scrape Fallback 1] Received response {response.status_code} from {url}")
            response.raise_for_status()
            content_type = response.headers.get('content-type', '').lower()
            if 'html' not in content_type and 'xml' not in content_type:
                logger.warning(f"[Web Scrape Fallback 1] Non-HTML/XML content type received from {url}: {content_type}")
                if 'text/plain' in content_type: logger.info(f"[Web Scrape Fallback 1] Content type is text/plain for {url}, reading."); return response.text
                return None
            try: return response.text
            except Exception as e: logger.error(f"[Web Scrape Fallback 1] Error decoding response text for {url}: {e}"); return None
    except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape Fallback 1] HTTP error {e.response.status_code} fetching {url}: {e}")
    except httpx.TimeoutException: logger.error(f"[Web Scrape Fallback 1] Timeout error fetching {url}")
    except httpx.TooManyRedirects: logger.error(f"[Web Scrape Fallback 1] Too many redirects fetching {url}")
    except httpx.RequestError as e: logger.error(f"[Web Scrape Fallback 1] Request error fetching {url}: {e}")
    except Exception as e: logger.error(f"[Web Scrape Fallback 1] Unexpected error fetching {url}: {e}", exc_info=True)
    return None

async def get_website_content_direct_bs4(url: str) -> Optional[str]:
    """Fallback 1: Fetches HTML directly and parses with BeautifulSoup."""
    if not url: logger.error("[Web Scrape Fallback 1] No URL provided"); return None
    logger.info(f"[Web Scrape Fallback 1] Attempting direct fetch and parse for: {url}")
    html_content = await fetch_url_content_for_scrape(url)
    if not html_content: logger.warning(f"[Web Scrape Fallback 1] Direct fetch failed for {url}."); return None
    try:
        def parse_html(content: str) -> Optional[str]:
            try:
                soup = BeautifulSoup(content, DEFAULT_PARSER)
                # More aggressive removal of potentially noisy tags
                for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "textarea", "select", "option", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "picture", "source", "map", "area", "details", "dialog"]):
                    element.extract()
                # Try common main content containers
                main_content = soup.find('main') or soup.find('article') or soup.find(role='main') or soup.find(id=re.compile(r'content|main|body|post', re.I)) or soup.find(class_=re.compile(r'content|main|body|article|post|entry', re.I))
                target_element = main_content if main_content else soup.body
                if not target_element:
                    logger.warning(f"[Web Scrape Fallback 1 Parse] Could not find body or main content candidates for {url}")
                    # Fallback: Get text from the whole soup if no specific container found
                    text_from_root = " ".join(line.strip() for line in soup.get_text(separator='\n', strip=True).splitlines() if line.strip())
                    if text_from_root and len(text_from_root) > 50:
                        logger.warning(f"[Web Scrape Fallback 1 Parse] Using text from root as fallback for {url}. Length: {len(text_from_root)}")
                        return text_from_root
                    return None # Really couldn't find anything useful

                # Extract text from the chosen element (main_content or body)
                lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
                text = " ".join(lines)

                # Check if the extracted text is meaningful
                if not text or len(text) < 50: # Increased threshold slightly
                    logger.warning(f"[Web Scrape Fallback 1 Parse] Extracted text from target element too short or empty for {url}. Length: {len(text)}")
                    # As a final attempt, try getting text from the entire soup again
                    text_from_root_final = " ".join(line.strip() for line in soup.get_text(separator='\n', strip=True).splitlines() if line.strip())
                    if text_from_root_final and len(text_from_root_final) > 50:
                        logger.warning(f"[Web Scrape Fallback 1 Parse] Reverting to text from root as final attempt for {url}. Length: {len(text_from_root_final)}")
                        return text_from_root_final
                    return None # Give up if even root text is too short

                return text # Return the text from the target element
            except Exception as parse_e:
                logger.error(f"[Web Scrape Fallback 1 Parse] BS4 parsing error for {url}: {parse_e}", exc_info=False)
                return None

        # Run parsing in a separate thread to avoid blocking asyncio loop
        text_content = await asyncio.to_thread(parse_html, html_content)

        if text_content:
            logger.info(f"[Web Scrape Fallback 1] Success via direct fetch & parse for {url} (len: {len(text_content)})")
            return text_content
        else:
            logger.warning(f"[Web Scrape Fallback 1] Parsing failed or yielded no meaningful content for {url}.")
            return None
    except Exception as e:
        logger.error(f"[Web Scrape Fallback 1] Unexpected error during parsing phase for {url}: {e}", exc_info=True)
        return None

# --- Fallback 2: urltotext.com API ---
async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
    """Fallback 2: Fetches website content using urltotext.com API."""
    if not url: logger.error("[Web Scrape Fallback 2] No URL"); return None
    if not api_key: logger.error("[Web Scrape Fallback 2] urltotext.com API key missing."); return None
    logger.info(f"[Web Scrape Fallback 2] Attempting fetch for: {url} using urltotext.com API")
    api_endpoint = "https://urltotext.com/api/v1/urltotext/"
    payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
    headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" }
    try:
        async with httpx.AsyncClient(timeout=45.0) as client:
            logger.debug(f"[Web Scrape Fallback 2] Sending request to urltotext.com API for {url}")
            response = await client.post(api_endpoint, headers=headers, json=payload)
            logger.debug(f"[Web Scrape Fallback 2] Received status {response.status_code} from urltotext.com API for {url}")
            if response.status_code == 200:
                try:
                    data = response.json()
                    content = data.get("data", {}).get("content"); credits = data.get("credits_used", "N/A"); warning = data.get("data", {}).get("warning")
                    if warning: logger.warning(f"[Web Scrape Fallback 2] urltotext.com API Warning for {url}: {warning}")
                    if content and isinstance(content, str) and len(content.strip()) > 30: # Check length after stripping
                         logger.info(f"[Web Scrape Fallback 2] Success via urltotext.com API for {url}. Len: {len(content.strip())}. Credits: {credits}")
                         return content.strip()
                    else:
                         content_len = len(content.strip()) if content and isinstance(content, str) else 0
                         logger.warning(f"[Web Scrape Fallback 2] urltotext.com API success but content empty/short for {url}. Len: {content_len}. Resp: {data}"); return None
                except json.JSONDecodeError: logger.error(f"[Web Scrape Fallback 2] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
                except Exception as e: logger.error(f"[Web Scrape Fallback 2] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
            elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[Web Scrape Fallback 2] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
            else: logger.error(f"[Web Scrape Fallback 2] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
    except httpx.TimeoutException: logger.error(f"[Web Scrape Fallback 2] Timeout connecting to urltotext.com API for {url}"); return None
    except httpx.RequestError as e: logger.error(f"[Web Scrape Fallback 2] Request error connecting to urltotext.com API for {url}: {e}"); return None
    except Exception as e: logger.error(f"[Web Scrape Fallback 2] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None


# --- Fallback 3: Scraper's Proxy Parser via RapidAPI ---
async def get_website_content_via_scrapers_proxy(url: str, api_key: str) -> Optional[str]:
    """Fallback 3: Fetches website content using Scraper's Proxy Parser via RapidAPI."""
    if not url: logger.error("[Web Scrape Fallback 3] No URL provided"); return None
    if not api_key: logger.error("[Web Scrape Fallback 3] RapidAPI key missing."); return None
    logger.info(f"[Web Scrape Fallback 3] Attempting fetch for: {url} using Scraper's Proxy Parser API")
    api_host = "scrapers-proxy2.p.rapidapi.com"
    encoded_url = urllib.parse.quote(url, safe='')
    api_endpoint = f"https://{api_host}/parser?url={encoded_url}&auto_detect=true"
    headers = { "x-rapidapi-host": api_host, "x-rapidapi-key": api_key, "accept-encoding": "gzip" }
    try:
        async with httpx.AsyncClient(timeout=40.0) as client:
            logger.debug(f"[Web Scrape Fallback 3] Sending GET request to {api_host} for {url}")
            response = await client.get(api_endpoint, headers=headers)
            logger.debug(f"[Web Scrape Fallback 3] Received status {response.status_code} from {api_host} for {url}")
            if response.status_code == 200:
                try:
                    data = response.json()
                    content = data.get("content"); title = data.get("title"); extracted_text = ""
                    if title and isinstance(title, str): extracted_text += title.strip() + ". "
                    if content and isinstance(content, str): extracted_text += content.strip()
                    extracted_text = extracted_text.strip() # Strip final result
                    if extracted_text and len(extracted_text) > 30:
                        logger.info(f"[Web Scrape Fallback 3] Success via Scraper's Proxy API for {url}. Len: {len(extracted_text)}")
                        return extracted_text
                    else:
                         logger.warning(f"[Web Scrape Fallback 3] Scraper's Proxy API success but content/title too short/empty for {url}. Keys: {list(data.keys())}. Length: {len(extracted_text)}")
                         return None
                except json.JSONDecodeError: logger.error(f"[Web Scrape Fallback 3] Failed JSON decode Scraper's Proxy API for {url}. Status:{response.status_code}. Resp:{response.text[:500]}"); return None
                except Exception as e: logger.error(f"[Web Scrape Fallback 3] Error processing Scraper's Proxy API success response for {url}: {e}", exc_info=True); return None
            elif response.status_code == 401: logger.error(f"[Web Scrape Fallback 3] Auth error (401) with {api_host}. Check RapidAPI key."); return None
            elif response.status_code == 403: logger.error(f"[Web Scrape Fallback 3] Forbidden (403) from {api_host}. Check subscription/limits."); return None
            elif response.status_code == 429: logger.warning(f"[Web Scrape Fallback 3] Rate Limit (429) from {api_host}."); return None
            elif response.status_code >= 500: logger.error(f"[Web Scrape Fallback 3] Server error ({response.status_code}) from {api_host}. Resp:{response.text[:200]}"); return None
            else: logger.error(f"[Web Scrape Fallback 3] Unexpected status {response.status_code} from {api_host} API for {url}. Resp:{response.text[:200]}"); return None
    except httpx.TimeoutException: logger.error(f"[Web Scrape Fallback 3] Timeout connecting to {api_host} API for {url}"); return None
    except httpx.RequestError as e: logger.error(f"[Web Scrape Fallback 3] Request error connecting to {api_host} API for {url}: {e}"); return None
    except Exception as e: logger.error(f"[Web Scrape Fallback 3] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None


# --- Fallback 4: AI Web Scraper via RapidAPI ---
async def get_website_content_via_ai_web_scraper(url: str, api_key: str) -> Optional[str]:
    """Fallback 4: Fetches website content using AI Web Scraper via RapidAPI."""
    if not url: logger.error("[Web Scrape Fallback 4] No URL provided"); return None
    if not api_key: logger.error("[Web Scrape Fallback 4] RapidAPI key missing."); return None
    logger.info(f"[Web Scrape Fallback 4] Attempting fetch for: {url} using AI Web Scraper API")
    api_host = "ai-web-scraper.p.rapidapi.com"; api_endpoint = f"https://{api_host}/extract_content/v1"
    headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'x-rapidapi-host': api_host, 'x-rapidapi-key': api_key }
    payload = {'url': url}
    try:
        async with httpx.AsyncClient(timeout=45.0) as client:
            logger.debug(f"[Web Scrape Fallback 4] Sending POST request to {api_host} for {url}")
            response = await client.post(api_endpoint, headers=headers, data=payload)
            logger.debug(f"[Web Scrape Fallback 4] Received status {response.status_code} from {api_host} for {url}")
            if response.status_code == 200:
                try:
                    data = response.json(); content = None
                    if isinstance(data, dict): content = data.get("content") or data.get("text") or data.get("extracted_text") or data.get("result")
                    elif isinstance(data, str): content = data

                    if content and isinstance(content, str):
                        content_stripped = content.strip()
                        if len(content_stripped) > 30:
                            logger.info(f"[Web Scrape Fallback 4] Success via AI Web Scraper API for {url}. Len: {len(content_stripped)}")
                            return content_stripped
                        else:
                            logger.warning(f"[Web Scrape Fallback 4] AI Web Scraper API success but content too short after stripping for {url}. Len: {len(content_stripped)}")
                            return None
                    else:
                        keys_info = f"Keys: {list(data.keys())}" if isinstance(data, dict) else f"Type: {type(data)}"
                        logger.warning(f"[Web Scrape Fallback 4] AI Web Scraper API success but content empty/invalid format for {url}. {keys_info}")
                        return None
                except json.JSONDecodeError:
                    raw_text = response.text.strip()
                    if raw_text and len(raw_text) > 30:
                         logger.warning(f"[Web Scrape Fallback 4] Failed JSON decode for AI Web Scraper, but found raw text. Status:{response.status_code}. Using raw text. Len: {len(raw_text)}")
                         return raw_text
                    else:
                         logger.error(f"[Web Scrape Fallback 4] Failed JSON decode AI Web Scraper API for {url}. Status:{response.status_code}. Resp empty/short:{raw_text[:500]}")
                         return None
                except Exception as e: logger.error(f"[Web Scrape Fallback 4] Error processing AI Web Scraper API success response for {url}: {e}", exc_info=True); return None
            elif response.status_code == 401: logger.error(f"[Web Scrape Fallback 4] Auth error (401) with {api_host}. Check RapidAPI key."); return None
            elif response.status_code == 403: logger.error(f"[Web Scrape Fallback 4] Forbidden (403) from {api_host}. Check subscription/limits."); return None
            elif response.status_code == 429: logger.warning(f"[Web Scrape Fallback 4] Rate Limit (429) from {api_host}."); return None
            elif response.status_code >= 500: logger.error(f"[Web Scrape Fallback 4] Server error ({response.status_code}) from {api_host}. Resp:{response.text[:200]}"); return None
            else: logger.error(f"[Web Scrape Fallback 4] Unexpected status {response.status_code} from {api_host} API for {url}. Resp:{response.text[:200]}"); return None
    except httpx.TimeoutException: logger.error(f"[Web Scrape Fallback 4] Timeout connecting to {api_host} API for {url}"); return None
    except httpx.RequestError as e: logger.error(f"[Web Scrape Fallback 4] Request error connecting to {api_host} API for {url}: {e}"); return None
    except Exception as e: logger.error(f"[Web Scrape Fallback 4] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None


# --- Fallback 5 & 6: Apify Website Scraping ---
async def _run_apify_actor_for_web_content(url: str, api_token: str, actor_id: str, actor_name: str, fallback_num: int) -> Optional[str]:
    """Generic function to run an Apify actor and get text content."""
    if not url: logger.error(f"[{actor_name} - FB{fallback_num}] No URL provided"); return None
    if not api_token: logger.error(f"[{actor_name} - FB{fallback_num}] API token missing."); return None
    logger.info(f"[{actor_name} - FB{fallback_num}] Attempting fetch for URL: {url} (Actor: {actor_id})")

    sync_items_endpoint = f"https://api.apify.com/v2/acts/{actor_id}/run-sync-get-dataset-items"; params = {"token": api_token}
    # Define different inputs based on actor
    run_input: Dict[str, Any]
    if actor_id == APIFY_TEXT_SCRAPER_ACTOR_ID:
         # Input for Text Scraper Free
         run_input = { "urls": [url] }
         logger.debug(f"[{actor_name} - FB{fallback_num}] Using simplified input for Text Scraper: {run_input}")
    elif actor_id == APIFY_CRAWLER_ACTOR_ID:
         # Input for Website Content Crawler (limit crawl depth)
         run_input = {
             "startUrls": [{"url": url}],
             "maxCrawlPages": 1, # Only crawl the start URL
             "maxCrawlDepth": 0, # Do not follow links
             "crawlerType": "playwright:firefox", # Or chromium
             "maxResults": 1,
             # You might need to add parameters to extract specific content if default fails
             # e.g., "pageFunction": "async function pageFunction(context) { return { text: document.body.innerText }; }"
         }
         logger.debug(f"[{actor_name} - FB{fallback_num}] Using input for Website Content Crawler: {run_input}")
    else:
         logger.error(f"[{actor_name} - FB{fallback_num}] Unknown Apify actor ID: {actor_id}. Cannot determine input format.")
         return None

    headers = {"Content-Type": "application/json"}

    try:
        async with httpx.AsyncClient(timeout=180.0) as client: # Increased timeout for Apify actors
            logger.debug(f"[{actor_name} - FB{fallback_num}] POST Request to {sync_items_endpoint} for {url}")
            response = await client.post(sync_items_endpoint, headers=headers, params=params, json=run_input)
            logger.debug(f"[{actor_name} - FB{fallback_num}] Received status code {response.status_code} for {url}")
            if response.status_code == 200:
                try:
                    results = response.json()
                    if isinstance(results, list) and len(results) > 0:
                        item = results[0]; content = None
                        # Prioritize 'text', then 'content', then 'markdown'
                        if "text" in item and isinstance(item["text"], str): content = item["text"]
                        elif "content" in item and isinstance(item["content"], str): content = item["content"]
                        elif "markdown" in item and isinstance(item["markdown"], str): content = item["markdown"]
                        # Fallback: Parse 'html' if other fields are missing
                        elif "html" in item and isinstance(item["html"], str):
                             logger.warning(f"[{actor_name} - FB{fallback_num}] No 'text', 'content', or 'markdown' found, parsing 'html'.")
                             try:
                                soup = BeautifulSoup(item["html"], DEFAULT_PARSER)
                                content = " ".join(line.strip() for line in soup.get_text(separator='\n', strip=True).splitlines() if line.strip())
                             except Exception as bs_err:
                                logger.error(f"[{actor_name} - FB{fallback_num}] Error parsing Apify HTML with BS4: {bs_err}")
                                content = None # Ensure content is None if parsing fails

                        if content and isinstance(content, str):
                            content_stripped = content.strip()
                            if len(content_stripped) > 50: # Increased length check
                                 logger.info(f"[{actor_name} - FB{fallback_num}] Success via REST for {url}. Length: {len(content_stripped)}")
                                 return content_stripped
                            else:
                                 logger.warning(f"[{actor_name} - FB{fallback_num}] Dataset item parsed but text content too short after stripping for {url}. Length: {len(content_stripped)}")
                                 return None
                        else:
                             logger.warning(f"[{actor_name} - FB{fallback_num}] Dataset item parsed but text content empty or invalid format for {url}. Item keys: {list(item.keys())}")
                             return None
                    else: logger.warning(f"[{actor_name} - FB{fallback_num}] Actor success but dataset was empty for {url}. Response: {results}"); return None
                except json.JSONDecodeError: logger.error(f"[{actor_name} - FB{fallback_num}] Failed JSON decode. Status:{response.status_code}. Resp:{response.text[:200]}"); return None
                except Exception as e: logger.error(f"[{actor_name} - FB{fallback_num}] Error processing success response for {url}: {e}", exc_info=True); return None
            elif response.status_code == 400: logger.error(f"[{actor_name} - FB{fallback_num}] Bad Request (400) for {url}. Check run_input. Resp:{response.text[:200]}"); return None
            elif response.status_code == 401: logger.error(f"[{actor_name} - FB{fallback_num}] Auth error (401). Check token."); return None
            elif response.status_code == 404: logger.error(f"[{actor_name} - FB{fallback_num}] Endpoint/Actor Not Found (404). Actor: {actor_id} Resp:{response.text[:200]}"); return None
            else: logger.error(f"[{actor_name} - FB{fallback_num}] Unexpected status {response.status_code} for {url}. Resp:{response.text[:200]}"); return None
    except httpx.TimeoutException as e: logger.error(f"[{actor_name} - FB{fallback_num}] Timeout during API interaction for {url}: {e}"); return None
    except httpx.HTTPStatusError as e: logger.error(f"[{actor_name} - FB{fallback_num}] HTTP Status Error during API interaction for {url}: {e}"); return None
    except httpx.RequestError as e: logger.error(f"[{actor_name} - FB{fallback_num}] Request error during API interaction for {url}: {e}"); return None
    except Exception as e: logger.error(f"[{actor_name} - FB{fallback_num}] Unexpected error during {actor_name} call for {url}: {e}", exc_info=True); return None


async def get_website_content_via_apify_crawler(url: str, api_token: str) -> Optional[str]:
    """Fallback 5: Fetches website content using Apify Website Content Crawler."""
    return await _run_apify_actor_for_web_content(
        url=url, api_token=api_token, actor_id=APIFY_CRAWLER_ACTOR_ID,
        actor_name="Apify Crawler", fallback_num=5
    )

async def get_website_content_via_apify_text_scraper(url: str, api_token: str) -> Optional[str]:
    """Fallback 6: Fetches website content using Apify Text Scraper Free."""
    return await _run_apify_actor_for_web_content(
        url=url, api_token=api_token, actor_id=APIFY_TEXT_SCRAPER_ACTOR_ID,
        actor_name="Apify Text Scraper", fallback_num=6
    )


# --- Summarization Functions (Unchanged) ---
async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
    """Internal function to call Gemini API. Returns (summary, error_message)."""
    global GEMINI_MODEL, _gemini_primary_enabled
    if not _gemini_primary_enabled:
        logger.error("[Gemini Primary] Called but is disabled.");
        return None, "Error: Primary AI service (Gemini) not configured/available."
    logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")

    if summary_type == "paragraph":
        prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
                  "• Clear and simple language suitable for someone unfamiliar with the topic.\n"
                  "• Uses British English spellings throughout.\n"
                  "• Straightforward and understandable vocabulary; avoid complex terms.\n"
                  "• Presented as ONE SINGLE PARAGRAPH.\n"
                  "• No more than 85 words maximum; but does not have to be exactly 85.\n"
                  "• Considers the entire text content equally.\n"
                  "• Uses semicolons (;) instead of em dashes (– or —).\n"
                  "• **Focus ONLY on the main content; strictly EXCLUDE information about website features, subscriptions, ads, cookie notices, or navigation elements. Do not include things like free/paid tiers; basic/premium memberships. Especially for ACS membership.**\n\n"
                  "Here is the text to summarise:")
    else: # points summary
        prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this format:\n\n"
                  "• For each distinct topic or section identified in the text, create a heading.\n"
                  "• Each heading MUST be plain text without any formatting (e.g., Section Title).\n"
                  "• Immediately following each heading, list the key points as a bulleted list.\n"
                  "• Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n"
                  "• The text within each bullet point should NOT contain any bold formatting.\n"
                  "• IMPORTANT: Never use bold formatting (double asterisks) within the text of the bullet points themselves.\n"
                  "• Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n"
                  "• Use British English spellings throughout.\n"
                  "• Avoid overly complex or advanced vocabulary.\n"
                  "• Keep bullet points concise.\n"
                  "• Ensure the entire summary takes no more than two minutes to read.\n"
                  "• Consider the entire text's content, not just the beginning or a few topics.\n"
                  "• Use semicolons (;) instead of em dashes (– or —).\n"
                  "• **Focus ONLY on the main content; strictly EXCLUDE information about website features, subscriptions, ads, cookie notices, or navigation elements. Do not include things like free/paid tiers; basic/premium memberships. Especially for ACS membership.**\n\n"
                  "Here is the text to summarise:")

    # Gemini 1.5 Flash context window is large, but let's keep a reasonable practical limit
    MAX_INPUT_LENGTH_GEMINI = 900000 # Approx 1M tokens
    if len(text) > MAX_INPUT_LENGTH_GEMINI:
        logger.warning(f"[Gemini Primary] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH_GEMINI}). Truncating.");
        text = text[:MAX_INPUT_LENGTH_GEMINI] + "... (Content truncated)"
    full_prompt = f"{prompt}\n\n{text}"

    safety_settings = { HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, }
    # Check if HARM_CATEGORY_CIVIC_INTEGRITY exists before adding (might vary by SDK version/region)
    # if hasattr(HarmCategory, 'HARM_CATEGORY_CIVIC_INTEGRITY'):
    #     safety_settings[HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY] = HarmBlockThreshold.BLOCK_NONE
    logger.debug(f"[Gemini Primary] Using safety settings: { {k.name: v.name for k, v in safety_settings.items()} }")

    try:
        logger.debug(f"[Gemini Primary] Initializing model {GEMINI_MODEL}")
        model = genai.GenerativeModel(GEMINI_MODEL)
        logger.info(f"[Gemini Primary] Sending request to Gemini ({GEMINI_MODEL})...")
        request_options = {"timeout": 120} # 120 seconds timeout
        response = await model.generate_content_async(
            full_prompt,
            generation_config=genai.types.GenerationConfig(), # Use default generation config
            safety_settings=safety_settings,
            request_options=request_options
        )
        logger.info("[Gemini Primary] Received response from Gemini.")

        # Check for blocking based on prompt feedback first
        if response.prompt_feedback and response.prompt_feedback.block_reason:
            block_reason_str = getattr(response.prompt_feedback.block_reason, 'name', str(response.prompt_feedback.block_reason))
            logger.warning(f"[Gemini Primary] Request blocked by API based on prompt feedback. Reason: {block_reason_str}");
            return None, f"Sorry, the primary AI model ({GEMINI_MODEL}) blocked the request (Reason: {block_reason_str})."

        # If not blocked by prompt, check candidate content and finish reason
        summary = None
        finish_reason_str = 'UNKNOWN'
        safety_block_reason = None

        if response.candidates:
            candidate = response.candidates[0]
            finish_reason_enum = getattr(candidate, 'finish_reason', None)
            finish_reason_str = getattr(finish_reason_enum, 'name', 'N/A') if finish_reason_enum else 'N/A'

            if finish_reason_str == 'SAFETY':
                safety_ratings_str = "N/A"
                if hasattr(candidate, 'safety_ratings'):
                    safety_ratings_str = ", ".join([f"{rating.category.name}: {rating.probability.name}" for rating in candidate.safety_ratings])
                safety_block_reason = f"SAFETY (Ratings: [{safety_ratings_str}])"
                logger.warning(f"[Gemini Primary] Candidate blocked due to SAFETY. Finish Reason: {finish_reason_str}. {safety_block_reason}")
                # Don't return yet, check if response.text fallback works

            elif finish_reason_str not in ['STOP', 'MAX_TOKENS', 'N/A', None]: # Log unusual reasons
                 logger.warning(f"[Gemini Primary] Candidate finished with non-standard reason: {finish_reason_str}")

            # Try extracting text from the candidate parts
            if candidate.content and candidate.content.parts:
                summary = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))

        # Fallback to response.text if candidate parsing failed or was blocked (but prompt wasn't)
        if summary is None:
            try:
                # This might raise ValueError if the response was fully blocked (e.g., safety)
                summary = response.text
                if safety_block_reason: # If we got here despite a safety block, log it
                    logger.warning(f"[Gemini Primary] Got text via response.text despite SAFETY block reason: {safety_block_reason}")
            except ValueError as e:
                logger.warning(f"[Gemini Primary] Error accessing response.text (likely blocked response): {e}. Final Finish Reason: {finish_reason_str}")
                summary = None # Ensure summary is None if .text fails

        # Final check and return
        if summary:
            logger.info(f"[Gemini Primary] Success generating summary. Finish Reason: {finish_reason_str}. Output len: {len(summary)}");
            return summary.strip(), None
        else:
            # Provide a more specific error if safety was the likely cause
            error_msg = f"Sorry, the primary AI model ({GEMINI_MODEL}) did not provide a summary (Finish Reason: {finish_reason_str})."
            if safety_block_reason:
                error_msg = f"Sorry, the primary AI model ({GEMINI_MODEL}) blocked the response due to safety filters ({finish_reason_str})."
            logger.warning(f"[Gemini Primary] Gemini returned empty summary or content was blocked. Final Finish Reason: {finish_reason_str}. Safety Block Reason: {safety_block_reason}");
            return None, error_msg

    except AttributeError as ae:
         # This might happen if the SDK response structure changes
         logger.error(f"[Gemini Primary] AttributeError during Gemini response processing: {ae}. SDK might be incompatible or response structure unexpected.", exc_info=True)
         return None, f"Sorry, error processing response from the primary AI ({GEMINI_MODEL})."
    except Exception as e:
         # Catch potential network errors, timeouts, etc.
         logger.error(f"[Gemini Primary] Unexpected error during Gemini API call: {e}", exc_info=True)
         return None, f"Sorry, unexpected error using primary AI ({GEMINI_MODEL})."

async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
    """Internal function to call OpenRouter API (Fallback). Returns (summary, error_message)."""
    global OPENROUTER_API_KEY, OPENROUTER_MODEL, _openrouter_fallback_enabled
    if not _openrouter_fallback_enabled:
        logger.error("[OpenRouter Fallback] Called but is disabled.");
        return None, "Error: Fallback AI service (OpenRouter) not configured/available."
    logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")

    if summary_type == "paragraph":
        prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
                  "• Clear and simple language suitable for someone unfamiliar with the topic.\n"
                  "• Uses British English spellings throughout.\n"
                  "• Straightforward and understandable vocabulary; avoid complex terms.\n"
                  "• Presented as ONE SINGLE PARAGRAPH.\n"
                  "• No more than 85 words maximum; but does not have to be exactly 85.\n"
                  "• Considers the entire text content equally.\n"
                  "• Uses semicolons (;) instead of em dashes (– or —).\n"
                  "• **Focus ONLY on the main content; strictly EXCLUDE information about website features, subscriptions, ads, cookie notices, or navigation elements. Do not include things like free/paid tiers; basic/premium memberships. Especially for ACS membership.**\n\n"
                  "Here is the text to summarise:")
    else: # points summary
        prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this format:\n\n"
                  "• For each distinct topic or section identified in the text, create a heading.\n"
                  "• Each heading MUST be plain text without any formatting (e.g., Section Title).\n"
                  "• Immediately following each heading, list the key points as a bulleted list.\n"
                  "• Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n"
                  "• The text within each bullet point should NOT contain any bold formatting.\n"
                  "• IMPORTANT: Never use bold formatting (double asterisks) within the text of the bullet points themselves.\n"
                  "• Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n"
                  "• Use British English spellings throughout.\n"
                  "• Avoid overly complex or advanced vocabulary.\n"
                  "• Keep bullet points concise.\n"
                  "• Ensure the entire summary takes no more than two minutes to read.\n"
                  "• Consider the entire text's content, not just the beginning or a few topics.\n"
                  "• Use semicolons (;) instead of em dashes (– or —).\n"
                  "• **Focus ONLY on the main content; strictly EXCLUDE information about website features, subscriptions, ads, cookie notices, or navigation elements. Do not include things like free/paid tiers; basic/premium memberships. Especially for ACS membership.**\n\n"
                  "Here is the text to summarise:")

    # Check model context window if known, otherwise use a generous limit
    # Deepseek Coder 33B has 16k context, let's aim lower for safety
    MAX_INPUT_LENGTH_OR = 60000 # Roughly 15k tokens
    if len(text) > MAX_INPUT_LENGTH_OR:
        logger.warning(f"[OpenRouter Fallback] Input length ({len(text)}) exceeds estimated limit ({MAX_INPUT_LENGTH_OR}) for {OPENROUTER_MODEL}. Truncating.");
        text = text[:MAX_INPUT_LENGTH_OR] + "... (Content truncated)"
    full_prompt = f"{prompt}\n\n{text}"

    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json",
        # Optional, but good practice for OpenRouter identification
        "HTTP-Referer": "https://github.com/your-repo-or-app-name", # Replace with your repo/app URL
        "X-Title": "TelegramSummariserBot" # Replace with your app name
    }
    payload = {
        "model": OPENROUTER_MODEL,
        "messages": [{"role": "user", "content": full_prompt}]
        # Add optional parameters like temperature, max_tokens if needed
        # "temperature": 0.7,
        # "max_tokens": 1024,
    }
    openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
    # Increased read timeout as some models can take time
    api_timeouts = httpx.Timeout(connect=10.0, read=90.0, write=10.0, pool=60.0);
    response = None

    try:
        async with httpx.AsyncClient(timeout=api_timeouts) as client:
            logger.info(f"[OpenRouter Fallback] Sending request to OpenRouter ({OPENROUTER_MODEL})...")
            response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
            logger.info(f"[OpenRouter Fallback] Received response. Status: {response.status_code}")

            if response.status_code == 200:
                try:
                    data = response.json()
                    if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
                        choice = data["choices"][0]
                        message = choice.get("message")
                        finish_reason = choice.get("finish_reason", "N/A")

                        if message and isinstance(message, dict):
                            summary = message.get("content")
                            if summary:
                                logger.info(f"[OpenRouter Fallback] Success. Finish: {finish_reason}. Output len: {len(summary)}")
                                return summary.strip(), None
                            else:
                                # Model might return empty content successfully
                                logger.warning(f"[OpenRouter Fallback] Success but content empty. Finish: {finish_reason}. Resp: {data}")
                                return None, f"Fallback AI ({OPENROUTER_MODEL}) returned empty summary (Finish: {finish_reason})."
                        else:
                            logger.error(f"[OpenRouter Fallback] Unexpected message structure: {message}. Finish: {finish_reason}. Full: {data}")
                            return None, "Could not parse fallback AI response (message format)."
                    else:
                        # Check for specific OpenRouter errors in the response body
                        error_details = data.get("error", {})
                        error_msg = error_details.get("message", "Unknown error in response structure")
                        logger.error(f"[OpenRouter Fallback] Unexpected choices structure or error in response. Error: {error_msg}. Full: {data}")
                        return None, f"Fallback AI response error: {error_msg}."
                except json.JSONDecodeError:
                    logger.error(f"[OpenRouter Fallback] Failed JSON decode. Status:{response.status_code}. Resp:{response.text[:500]}")
                    return None, "Failed to understand fallback AI response."
                except Exception as e:
                    logger.error(f"[OpenRouter Fallback] Error processing success response: {e}", exc_info=True)
                    return None, "Error processing fallback AI response."

            # Handle specific HTTP error codes
            elif response.status_code == 401:
                logger.error("[OpenRouter Fallback] API key invalid or missing (401).")
                return None, "Fallback AI authentication failed (check key)."
            elif response.status_code == 402:
                logger.error("[OpenRouter Fallback] Payment Required/Quota Exceeded (402).")
                return None, f"Fallback AI ({OPENROUTER_MODEL}) quota/limit reached."
            elif response.status_code == 429:
                logger.warning(f"[OpenRouter Fallback] Rate Limit Exceeded (429) for {OPENROUTER_MODEL}.")
                return None, f"Fallback AI ({OPENROUTER_MODEL}) is rate-limited. Try again later."
            elif response.status_code == 500:
                logger.error(f"[OpenRouter Fallback] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}")
                return None, f"Fallback AI service ({OPENROUTER_MODEL}) encountered an internal error."
            else:
                # General unexpected status code
                error_info = ""
                try: # Try to get error message from JSON response
                    error_info = response.json().get("error", {}).get("message", "")
                except Exception: pass
                logger.error(f"[OpenRouter Fallback] Unexpected status {response.status_code}. Error: '{error_info}' Resp:{response.text[:500]}");
                return None, f"Fallback AI ({OPENROUTER_MODEL}) returned error status {response.status_code}."

    except httpx.TimeoutException as e:
        logger.error(f"[OpenRouter Fallback] Timeout error ({type(e)}) connecting to or reading from OpenRouter API: {e}")
        return None, f"Fallback AI ({OPENROUTER_MODEL}) timed out."
    except httpx.RequestError as e:
        logger.error(f"[OpenRouter Fallback] Request error connecting to OpenRouter API: {e}")
        return None, "Error connecting to fallback AI service."
    except Exception as e:
        logger.error(f"[OpenRouter Fallback] Unexpected error during OpenRouter call: {e}", exc_info=True)
        return None, "Unexpected error using fallback AI service."

async def generate_summary(text: str, summary_type: str) -> str:
    """Generates summary using Gemini (Primary) and falls back to OpenRouter if needed."""
    global _gemini_primary_enabled, _openrouter_fallback_enabled, GEMINI_MODEL, OPENROUTER_MODEL
    logger.info(f"[Summary Generation] Starting process. Primary: Gemini ({GEMINI_MODEL}), Fallback: OpenRouter ({OPENROUTER_MODEL})")
    final_summary: Optional[str] = None; primary_error_message: Optional[str] = None

    if _gemini_primary_enabled:
        logger.info(f"[Summary Generation] Attempting primary AI: Gemini ({GEMINI_MODEL})")
        final_summary, primary_error_message = await _call_gemini(text, summary_type)
        if final_summary: logger.info("[Summary Generation] Success with primary AI (Gemini)."); return final_summary
        else: logger.warning(f"[Summary Generation] Primary AI (Gemini) failed. Error: {primary_error_message}. Proceeding to fallback.")
    else: logger.warning("[Summary Generation] Primary AI (Gemini) disabled. Proceeding to fallback."); primary_error_message = "Primary AI (Gemini) unavailable."

    if _openrouter_fallback_enabled:
        logger.info(f"[Summary Generation] Attempting fallback AI: OpenRouter ({OPENROUTER_MODEL})")
        fallback_summary, fallback_error_message = await _call_openrouter(text, summary_type)
        if fallback_summary: logger.info("[Summary Generation] Success with fallback AI (OpenRouter)."); return fallback_summary
        else:
            logger.error(f"[Summary Generation] Fallback AI (OpenRouter) also failed. Error: {fallback_error_message}")
            # Construct a combined error message
            primary_err = primary_error_message or "Primary AI unavailable"
            fallback_err = fallback_error_message or "Fallback AI failed with unknown error"
            return f"Sorry, summarization failed.\nPrimary: {primary_err}\nFallback ({OPENROUTER_MODEL}): {fallback_err}"
    else:
        logger.error("[Summary Generation] Fallback AI (OpenRouter) disabled. Cannot proceed.")
        if primary_error_message: return f"{primary_error_message} Fallback AI is also unavailable."
        else: return "Error: Both primary and fallback AI services are unavailable."

    # This line should technically not be reached if logic is sound
    logger.error("[Summary Generation] Reached end of function unexpectedly.")
    return "Sorry, unknown error during summary generation."


# --- Main Processing Logic (MODIFIED with Crawl4AI and re-ordered fallbacks) ---

async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
    """Handles the entire process: fetching content (Crawl4AI -> Fallbacks) and summarizing."""
    task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
    background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
    try:
        # Use longer timeouts for the background bot to handle potentially long scrapes/summaries
        background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=240.0, write_timeout=60.0, pool_timeout=240.0 )
        bot = Bot(token=bot_token, request=background_request)
    except Exception as e:
        logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True)
        # We cannot proceed without a bot instance
        return

    content: Optional[str] = None
    user_feedback_message: Optional[str] = None
    success: bool = False
    # Use the original button message ID if available, otherwise we'll send a new one
    status_message_id: Optional[int] = message_id_to_edit
    # Keep track if we sent a *new* message that needs deleting (vs editing the button message)
    new_status_message_id : Optional[int] = None

    try:
        # --- 1. Initial User Feedback ---
        processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nFetching content (using primary method... this might take a minute)..."
        if status_message_id:
            try:
                # Edit the message containing the buttons
                await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None )
                logger.debug(f"[Task {task_id}] Edited button message {status_message_id} to 'Processing'")
            except (BadRequest, TelegramError) as e:
                 # Common errors: message not modified, message to edit not found, query too old
                 logger.warning(f"[Task {task_id}] Could not edit original button message {status_message_id}: {e}. Will send a new status message.")
                 status_message_id = None # Ensure we send a new message if edit fails
            except Exception as e:
                logger.error(f"[Task {task_id}] Unexpected error editing button message {status_message_id}: {e}. Will send new.", exc_info=True)
                status_message_id = None

        # If we couldn't edit the original message, send a new one
        if not status_message_id:
             try:
                 status_message = await retry_bot_operation( bot.send_message, chat_id=chat_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN )
                 if status_message:
                     new_status_message_id = status_message.message_id
                     logger.debug(f"[Task {task_id}] Sent new status message {new_status_message_id}")
                 else:
                     # This should ideally not happen due to retry_bot_operation, but handle defensively
                     raise RuntimeError("Failed to send new status message after retries.")
             except Exception as e:
                 # If we can't even send a status message, we can't proceed meaningfully
                 logger.error(f"[Task {task_id}] CRITICAL: Failed to send initial status message: {e}. Aborting task.", exc_info=True)
                 # Attempt to clean up the original button message if it exists
                 if message_id_to_edit:
                     try: await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=message_id_to_edit)
                     except Exception: pass
                 raise # Re-raise to be caught by outer try/finally

        # Determine which message ID to update/delete later
        message_to_update_id = new_status_message_id or status_message_id

        try:
            # --- 2. Content Fetching (Chain of methods) ---
            # Send typing indicator
            await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
            is_youtube = is_youtube_url(url); logger.debug(f"[Task {task_id}] URL type: {'YouTube' if is_youtube else 'Website'}")

            if is_youtube:
                # --- YouTube Transcript Logic (Unchanged from original) ---
                video_id = extract_youtube_id(url)
                if video_id:
                    content = await get_youtube_transcript(video_id, url)
                else:
                    user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
                # Set feedback message if transcript fetch failed
                if not content and not user_feedback_message:
                    user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
            else:
                # --- Website Scraping Logic (NEW Order: Crawl4AI -> Fallbacks) ---
                global URLTOTEXT_API_KEY, RAPIDAPI_KEY, APIFY_API_TOKEN
                global _urltotext_key_exists, _rapidapi_key_exists, _apify_token_exists, _crawl4ai_primary_scrape_enabled

                # Method 0: Primary Scrape (Crawl4AI)
                logger.info(f"[Task {task_id}] Trying Web Scrape Method 0 (Primary: Crawl4AI)...")
                if _crawl4ai_primary_scrape_enabled:
                    content = await get_website_content_via_crawl4ai(url)
                    if content:
                         logger.info(f"[Task {task_id}] Method 0 (Crawl4AI) succeeded.")
                    else:
                         logger.warning(f"[Task {task_id}] Method 0 (Crawl4AI) failed or returned insufficient content.")
                         # Edit status message to indicate fallback attempt
                         if message_to_update_id:
                            try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=message_to_update_id, text="Primary scrape method failed, trying fallbacks...", parse_mode=ParseMode.MARKDOWN)
                            except Exception: pass # Ignore if edit fails
                else:
                    logger.warning(f"[Task {task_id}] Method 0 (Crawl4AI) skipped - library/driver unavailable.")
                    # Edit status message
                    if message_to_update_id:
                         try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=message_to_update_id, text="Primary scrape method unavailable, trying fallbacks...", parse_mode=ParseMode.MARKDOWN)
                         except Exception: pass

                # Method 1: Fallback 1 (Direct Fetch + BS4)
                if not content:
                    logger.warning(f"[Task {task_id}] Method 0 failed/skipped. Trying Method 1 (Direct Fetch + BS4)...")
                    await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
                    content = await get_website_content_direct_bs4(url)
                    if not content: logger.warning(f"[Task {task_id}] Method 1 (Direct Fetch + BS4) failed.")

                # Method 2: Fallback 2 (urltotext.com)
                if not content:
                    logger.warning(f"[Task {task_id}] Method 1 failed. Trying Method 2 (urltotext.com)...")
                    if _urltotext_key_exists:
                         await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
                         content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
                         if not content: logger.warning(f"[Task {task_id}] Method 2 (urltotext.com) failed.")
                    else: logger.warning(f"[Task {task_id}] Method 2 (urltotext.com) API key unavailable. Skipping.")

                # Method 3: Fallback 3 (Scraper's Proxy via RapidAPI)
                if not content:
                    logger.warning(f"[Task {task_id}] Method 2 failed. Trying Method 3 (Scraper's Proxy)...")
                    if _rapidapi_key_exists:
                        await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
                        content = await get_website_content_via_scrapers_proxy(url, RAPIDAPI_KEY)
                        if not content: logger.warning(f"[Task {task_id}] Method 3 (Scraper's Proxy) failed.")
                    else: logger.warning(f"[Task {task_id}] Method 3 (Scraper's Proxy) RapidAPI key unavailable. Skipping.")

                # Method 4: Fallback 4 (AI Web Scraper via RapidAPI)
                if not content:
                    logger.warning(f"[Task {task_id}] Method 3 failed. Trying Method 4 (AI Web Scraper)...")
                    if _rapidapi_key_exists:
                        await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
                        content = await get_website_content_via_ai_web_scraper(url, RAPIDAPI_KEY)
                        if not content: logger.warning(f"[Task {task_id}] Method 4 (AI Web Scraper) failed.")
                    else: logger.warning(f"[Task {task_id}] Method 4 (AI Web Scraper) RapidAPI key unavailable. Skipping.")

                # Method 5: Fallback 5 (Apify Website Content Crawler)
                if not content:
                    logger.warning(f"[Task {task_id}] Method 4 failed. Trying Method 5 (Apify Crawler)...")
                    if _apify_token_exists:
                        await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
                        content = await get_website_content_via_apify_crawler(url, APIFY_API_TOKEN)
                        if not content: logger.warning(f"[Task {task_id}] Method 5 (Apify Crawler) failed.")
                    else: logger.warning(f"[Task {task_id}] Method 5 (Apify Crawler) APIFY_API_TOKEN unavailable. Skipping.")

                # Method 6: Fallback 6 (Apify Text Scraper Free)
                if not content:
                    logger.warning(f"[Task {task_id}] Method 5 failed. Trying Method 6 (Apify Text Scraper)...")
                    if _apify_token_exists:
                        await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
                        content = await get_website_content_via_apify_text_scraper(url, APIFY_API_TOKEN)
                        if not content: logger.warning(f"[Task {task_id}] Method 6 (Apify Text Scraper) failed.")
                    else: logger.warning(f"[Task {task_id}] Method 6 (Apify Text Scraper) APIFY_API_TOKEN unavailable. Skipping.")

                # Final check if all website methods failed
                if not content and not user_feedback_message:
                    logger.error(f"[Task {task_id}] All web scraping methods failed for {url}.")
                    user_feedback_message = "Sorry, I couldn't fetch readable content from that website using multiple methods (blocked/dynamic content/empty?). Even the advanced crawler failed."

            # --- 3. Summarization ---
            if content:
                logger.info(f"[Task {task_id}] Content fetched successfully (len:{len(content)}). Generating '{summary_type}' summary.")
                # Update status message before starting potentially long summary generation
                if message_to_update_id:
                    try:
                        await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=message_to_update_id, text=f"Content fetched! Now generating '{summary_type}' summary with AI...", parse_mode=ParseMode.MARKDOWN, reply_markup=None )
                    except Exception as edit_e:
                        logger.warning(f"[Task {task_id}] Failed to edit status message before summary generation: {edit_e}")

                # Send typing indicator again for summary generation
                await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
                final_summary = await generate_summary(content, summary_type)

                # Check if summary generation itself returned an error message
                if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
                    user_feedback_message = final_summary # Use the error message from generate_summary
                    logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
                else:
                    # Summary successful, send it (potentially in parts)
                    max_length = 4096 # Telegram message length limit
                    if len(final_summary) <= max_length:
                        await retry_bot_operation( bot.send_message, chat_id=chat_id, text=final_summary, parse_mode=None, link_preview_options={'is_disabled': True} )
                    else:
                        # Split into parts
                        summary_parts = []
                        current_part = ""
                        for line in final_summary.splitlines(keepends=True):
                            if len(current_part) + len(line) > max_length:
                                summary_parts.append(current_part)
                                current_part = line
                            else:
                                current_part += line
                        if current_part: # Add the last part
                             summary_parts.append(current_part)

                        logger.info(f"[Task {task_id}] Summary too long ({len(final_summary)} chars), splitting into {len(summary_parts)} parts.")
                        for i, part in enumerate(summary_parts):
                            await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=None, link_preview_options={'is_disabled': True} )
                            if i < len(summary_parts) - 1:
                                await asyncio.sleep(0.7) # Short delay between parts

                    success = True
                    logger.info(f"[Task {task_id}] Successfully sent summary.")
                    user_feedback_message = None # Clear any previous potential error message

            # --- 4. Handle Final Failure Feedback ---
            # If we have a user_feedback_message set at this point, it means something failed
            # (either content fetching or summarization)
            if user_feedback_message:
                logger.warning(f"[Task {task_id}] Process failed. Sending failure feedback: {user_feedback_message}")
                # Send the failure message as a new message
                await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )

        except Exception as e:
            # Catch any unexpected errors during the main processing block
            logger.error(f"[Task {task_id}] Unexpected error during core processing: {e}", exc_info=True)
            user_feedback_message = "Oops! Something went wrong while processing your request. Please try again later."
            try:
                # Try to send a generic error message
                await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
            except Exception as feedback_err:
                logger.error(f"[Task {task_id}] Failed even to send the generic error feedback message: {feedback_err}")
            success = False # Ensure success is false

    except Exception as outer_e:
        # Catch critical errors (like failure to send initial status message)
        logger.critical(f"[Task {task_id}] Critical outer error prevented task execution: {outer_e}", exc_info=True)
        try:
             if bot: # Check if bot was initialized
                await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ A critical internal error occurred. I couldn't process your request." )
        except Exception as crit_feedback_err:
             logger.exception(f"[Task {task_id}] Failed even to send the critical error message: {crit_feedback_err}")
        success = False # Ensure success is false
    finally:
        # --- 5. Cleanup ---
        # Delete the status message we were updating (either the original button message or the new one we sent)
        delete_target_id = new_status_message_id if new_status_message_id else status_message_id
        if delete_target_id and bot:
            try:
                await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id)
                logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
            except (BadRequest, TelegramError) as del_e:
                 # Ignore errors like "message to delete not found"
                 if "not found" not in str(del_e).lower():
                     logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
            except Exception as del_e:
                 logger.warning(f"[Task {task_id}] Unexpected error deleting status/button message {delete_target_id}: {del_e}")

        # Close the background bot's HTTPX client if it was created
        if background_request and hasattr(background_request, '_client') and background_request._client:
             try:
                 await background_request._client.aclose()
                 logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
             except Exception as close_e:
                 logger.warning(f"[Task {task_id}] Error closing background bot's client: {close_e}")

        logger.info(f"[Task {task_id}] Task finished. Overall Success: {success}")


# --- Telegram Handlers ---
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    user = update.effective_user; mention = user.mention_html()
    if not user or not update.message: return
    logger.info(f"User {user.id} ({user.username or 'no_username'}) used /start.")
    await update.message.reply_html( f"👋 Hello {mention}! I can summarise YouTube links or website URLs.\n\nJust send me a link anytime!" )

async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    user = update.effective_user
    if not user or not update.message: return
    logger.info(f"User {user.id} ({user.username or 'no_username'}) used /help.")
    help_text = ( "🔍 **How to use this bot:**\n\n"
                  "1. Send me any YouTube video link or website URL.\n"
                  "2. I'll ask how you want it summarised (paragraph or points).\n"
                  "3. Click the button for your choice.\n"
                  "4. Wait while I fetch the content and generate the summary!\n\n"
                  "⚙️ **Website Scraping:** I use an advanced web crawler (`crawl4ai`) first. If that doesn't work, I'll try several fallback methods (direct fetch, APIs) to get the text.\n"
                  "📺 **YouTube:** I try the official library first, then fall back to APIs if needed.\n"
                  "🤖 **Summaries:** I use Google Gemini primarily, with OpenRouter as a backup.\n\n"
                  "**Commands:**\n"
                  "`/start` - Display the welcome message\n"
                  "`/help` - Show this help message" )
    await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)

async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    if not update.message or not update.message.text: return
    message_text = update.message.text.strip(); user = update.effective_user
    if not user: return

    # More robust URL extraction using regex - finds the first http(s) link
    url_pattern = re.compile(r"https?://[^\s/$.?#].[^\s]*", re.IGNORECASE)
    match = url_pattern.search(message_text)

    if match:
        extracted_url = match.group(0)
        # Clean potential trailing characters like periods or parentheses if message contained more text
        extracted_url = extracted_url.rstrip(').,')
        logger.info(f"User {user.id} ({user.username or 'no_username'}) sent potential URL: {extracted_url}")

        # Store URL and original message ID in user_data for the callback
        context.user_data['url_to_summarize'] = extracted_url
        context.user_data['original_message_id'] = update.message.message_id # Store original message ID if needed later

        keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
        reply_markup = InlineKeyboardMarkup(keyboard)
        try:
            # Reply to the original message
            await update.message.reply_text(
                f"Okay, I see this link:\n`{extracted_url}`\n\nHow would you like it summarised?",
                reply_markup=reply_markup,
                disable_web_page_preview=True,
                parse_mode=ParseMode.MARKDOWN
            )
        except BadRequest as e:
             if "chat not found" in str(e).lower() or "bot was blocked by the user" in str(e).lower():
                 logger.warning(f"Could not reply to user {user.id} (chat not found or blocked).")
             else:
                 logger.error(f"BadRequest replying to URL message from {user.id}: {e}")
        except Exception as e:
            logger.error(f"Error replying to URL message from {user.id}: {e}", exc_info=True)
    else:
        # If the message filter passed but regex didn't find a URL, log it but don't reply
        logger.debug(f"Ignoring message from {user.id} - Entity filter matched but no URL found by regex: {message_text[:100]}")


async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    query = update.callback_query
    if not query or not query.message or not query.from_user:
        logger.warning("Callback query received without essential data.")
        # Attempt to answer the query even if we can't process it, to remove the loading indicator
        if query:
            try: await query.answer("Error: Missing data.", show_alert=True)
            except Exception: pass
        return

    user = query.from_user
    summary_type = query.data
    query_id = query.id
    chat_id = query.message.chat_id
    message_id_to_edit = query.message.message_id # This is the message with the buttons

    try:
        # Acknowledge the button press quickly
        await query.answer()
        logger.debug(f"Acknowledged callback {query_id} from {user.id} for summary type '{summary_type}'")
    except BadRequest as e:
         if "query is too old" in str(e).lower():
             logger.warning(f"Callback query {query_id} is too old to answer. User might have double-clicked or waited too long.")
             # Optionally edit the message to indicate the issue if possible
             try: await query.edit_message_text(text="This request is too old. Please send the link again.", reply_markup=None)
             except Exception: pass
             return # Stop processing if the query is too old
         else:
             # Log other BadRequest errors but attempt to continue if acknowledging failed
             logger.error(f"Error answering callback {query_id}: {e}", exc_info=True)
    except Exception as e:
        logger.error(f"Unexpected error answering callback {query_id}: {e}", exc_info=True)
        # Attempt to continue processing even if answering failed

    # Retrieve the URL stored in user_data
    url = context.user_data.get('url_to_summarize')
    logger.info(f"User {user.id} chose '{summary_type}' for button message {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")

    if not url:
        logger.warning(f"No URL found in context for user {user.id} (callback query {query_id}). Button might be old or context lost.")
        try:
            # Edit the button message to inform the user
            await query.edit_message_text(
                text="Sorry, I couldn't find the original URL for this request (it might be too old or the bot restarted). Please send the link again.",
                reply_markup=None # Remove buttons
            )
        except (BadRequest, TelegramError) as edit_e:
            # Ignore errors like "message is not modified" or "message to edit not found"
            if "not modified" not in str(edit_e).lower() and "not found" not in str(edit_e).lower():
                logger.warning(f"Failed to edit 'URL not found' message {message_id_to_edit} for user {user.id}: {edit_e}")
        except Exception as edit_e:
            logger.warning(f"Error editing 'URL not found' message {message_id_to_edit} for user {user.id}: {edit_e}")
        return # Stop processing if URL is missing

    # Clear the URL from context once retrieved to prevent accidental reuse
    context.user_data.pop('url_to_summarize', None)
    context.user_data.pop('original_message_id', None) # Clear original message ID too
    logger.debug(f"Cleared URL context for user {user.id}")

    # --- Pre-task Checks ---
    global TELEGRAM_TOKEN, _gemini_primary_enabled, _openrouter_fallback_enabled
    if not TELEGRAM_TOKEN:
        logger.critical("FATAL: TELEGRAM_TOKEN missing when trying to start background task!")
        try: await query.edit_message_text(text="❌ Critical Bot Configuration Error (Missing Token). Cannot proceed.", reply_markup=None)
        except Exception: pass
        return

    if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
        logger.critical("FATAL: Neither Gemini nor OpenRouter API keys are configured/valid when trying to start background task!")
        try: await query.edit_message_text(text="❌ Critical AI Configuration Error: No summarization models available. Cannot proceed.", reply_markup=None)
        except Exception: pass
        return
    elif not _gemini_primary_enabled:
        logger.warning("Primary AI (Gemini) unavailable, relying solely on fallback for this task.")
    elif not _openrouter_fallback_enabled:
        logger.warning("Fallback AI (OpenRouter) unavailable, relying solely on primary for this task.")

    # --- Schedule Background Task ---
    logger.info(f"Scheduling background task for user {user.id}, chat {chat_id}, button message {message_id_to_edit}, url: {url[:60]}...")
    asyncio.create_task(
        process_summary_task(
            user_id=user.id,
            chat_id=chat_id,
            message_id_to_edit=message_id_to_edit, # Pass the button message ID
            url=url,
            summary_type=summary_type,
            bot_token=TELEGRAM_TOKEN
        ),
        # Name the task for easier debugging if needed
        name=f"SummaryTask-{user.id}-{message_id_to_edit}"
    )
    # Note: The process_summary_task will handle editing/deleting the message_id_to_edit

async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
    """Log Errors caused by Updates."""
    logger.error("Exception while handling an update:", exc_info=context.error)
    # Optionally add more context if 'update' is an Update object
    if isinstance(update, Update) and update.effective_chat:
        logger.error(f"Error occurred in chat {update.effective_chat.id}")


# --- Application Setup & Web Framework ---

async def setup_bot_config() -> Application:
    logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
    if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
    # Configure HTTPX request settings for the main PTB application
    custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
    application = Application.builder().token(TELEGRAM_TOKEN).request(custom_request).build()
    # --- Add Handlers ---
    application.add_handler(CommandHandler("start", start))
    application.add_handler(CommandHandler("help", help_command))
    # Use a filter that catches messages containing URL entities
    url_filter = filters.Entity("url") | filters.Entity("text_link")
    application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND & url_filter, handle_potential_url))
    # Handler for button clicks (summary type selection)
    application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
    # Error handler
    application.add_error_handler(error_handler)
    logger.info("Telegram application handlers configured."); return application

@contextlib.asynccontextmanager
async def lifespan(app: Starlette):
    global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
    logger.info("ASGI Lifespan: Startup initiated...");
    if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
    bot_setup_successful = False
    webhook_set = False
    try:
        ptb_app = await setup_bot_config()
        await ptb_app.initialize()
        bot_info = await ptb_app.bot.get_me()
        logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
        bot_setup_successful = True # Mark bot setup as successful here

        # --- Webhook Setup ---
        # Check and delete existing webhook first
        current_webhook_info = await ptb_app.bot.get_webhook_info()
        if current_webhook_info and current_webhook_info.url:
            logger.info(f"Found existing webhook: {current_webhook_info.url}. Attempting to delete...")
            try:
                if await ptb_app.bot.delete_webhook(drop_pending_updates=True):
                    logger.info("Existing webhook deleted successfully.")
                else:
                    # API returned False, might not be critical but worth noting
                    logger.warning("Attempt to delete existing webhook returned False from API.")
            except Exception as e:
                logger.warning(f"Could not delete existing webhook (Error: {e}). Proceeding with setting new webhook.", exc_info=True)
            await asyncio.sleep(1) # Short delay after potential delete

        # Determine webhook URL (assuming deployment provides SPACE_HOST)
        space_host = os.environ.get("SPACE_HOST")
        if not space_host:
            logger.critical("SPACE_HOST environment variable not found. Cannot set webhook.")
            raise RuntimeError("SPACE_HOST environment variable missing.")

        webhook_path = "/webhook" # Matches the route defined later
        # Ensure correct protocol and clean host formatting
        protocol = "https"
        host = space_host.split('://')[-1].rstrip('/') # Remove trailing slashes
        full_webhook_url = f"{protocol}://{host}{webhook_path}"
        logger.info(f"Calculated webhook URL: {full_webhook_url}")

        # Set the new webhook
        set_webhook_args = {
            "url": full_webhook_url,
            "allowed_updates": Update.ALL_TYPES, # Receive all update types
            "drop_pending_updates": True # Ignore updates while bot was down
        }
        if WEBHOOK_SECRET:
            set_webhook_args["secret_token"] = WEBHOOK_SECRET
            logger.info("Webhook secret token will be used.")
        else:
            logger.info("No webhook secret token configured.")

        # Give network/DNS a moment before setting
        await asyncio.sleep(1.5)
        logger.info(f"Attempting to set webhook to: {full_webhook_url} with args: {set_webhook_args}")
        await ptb_app.bot.set_webhook(**set_webhook_args)

        # Verify webhook setup
        await asyncio.sleep(1) # Allow time for info propagation
        new_webhook_info = await ptb_app.bot.get_webhook_info()
        if new_webhook_info.url == full_webhook_url:
            logger.info(f"Webhook successfully set: URL='{new_webhook_info.url}', Secret Token Set={bool(WEBHOOK_SECRET)}")
            webhook_set = True
        else:
            logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', but GET response shows '{new_webhook_info.url}'. Check firewall/proxy/platform settings.")
            # Decide whether to raise an error or try to continue
            # For now, let's raise an error as webhook is critical
            raise RuntimeError("Failed to verify webhook URL after setting.")

        # Start the PTB application processing
        await ptb_app.start()
        logger.info("PTB Application started and polling for updates via webhook.")

        logger.info("ASGI Lifespan: Startup complete."); yield # Application runs here

    except Exception as startup_err:
        logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
        # Attempt cleanup even if startup failed partially
        if ptb_app and bot_setup_successful:
             if ptb_app.running:
                 try: await ptb_app.stop()
                 except Exception as stop_err: logger.error(f"Error stopping PTB app during failed startup: {stop_err}")
             # Try to delete webhook if it was potentially set
             if webhook_set:
                  try:
                      logger.info("Attempting to delete webhook due to startup failure...")
                      await ptb_app.bot.delete_webhook(drop_pending_updates=True)
                      logger.info("Webhook deleted during failed startup cleanup.")
                  except Exception as del_wh_err: logger.error(f"Failed to delete webhook during failed startup cleanup: {del_wh_err}")
             try: await ptb_app.shutdown()
             except Exception as shutdown_err: logger.error(f"Error shutting down PTB app during failed startup: {shutdown_err}")
        raise # Re-raise the original startup error
    finally:
        # --- Shutdown Logic ---
        logger.info("ASGI Lifespan: Shutdown initiated...")
        if ptb_app and bot_setup_successful:
            # Stop PTB app first
            if ptb_app.running:
                logger.info("Stopping PTB Application processing...")
                try: await ptb_app.stop()
                except Exception as e: logger.error(f"Error stopping PTB application: {e}")
            else: logger.info("PTB Application was not running.")

            # Delete webhook before shutting down fully
            try:
                logger.info("Attempting to delete webhook on shutdown...")
                if ptb_app.bot and hasattr(ptb_app.bot, 'delete_webhook'):
                    # Check if webhook is actually set before trying to delete
                    current_wh_info = await ptb_app.bot.get_webhook_info()
                    if current_wh_info and current_wh_info.url:
                        if await ptb_app.bot.delete_webhook(drop_pending_updates=True):
                            logger.info("Webhook deleted successfully on shutdown.")
                        else:
                            logger.warning("Failed to delete webhook on shutdown (API returned False).")
                    else:
                         logger.info("No webhook was set, skipping deletion.")
                else:
                    logger.warning("Cannot delete webhook: Bot object unavailable or doesn't support delete_webhook.")
            except Exception as e:
                logger.warning(f"Could not delete webhook during shutdown: {e}", exc_info=False)

            # Shutdown PTB application resources
            logger.info("Shutting down PTB Application resources...")
            try: await ptb_app.shutdown()
            except Exception as e: logger.error(f"Error during PTB application shutdown: {e}")
            logger.info("PTB Application shut down.")
        else:
            logger.info("PTB app not fully initialized or setup failed. Skipping PTB shutdown steps.")
        logger.info("ASGI Lifespan: Shutdown complete.")


async def health_check(request: Request) -> PlainTextResponse:
    """Simple health check endpoint."""
    global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled
    global _urltotext_key_exists, _rapidapi_key_exists, SUPADATA_API_KEY, _crawl4ai_primary_scrape_enabled
    bot_status = "Not Initialized"; bot_username = "N/A"

    if ptb_app and ptb_app.bot:
        try:
             # Check if the application is running (processing updates)
             app_running = ptb_app.running
             # Try to get bot info regardless of running state if bot object exists
             bot_info = await ptb_app.bot.get_me()
             bot_username = f"@{bot_info.username}" if bot_info and bot_info.username else "Info Fetch Error"

             if app_running:
                 bot_status = "Running"
             else:
                 # If initialized but not running (e.g., during startup/shutdown)
                 bot_status = "Initialized (Not Processing Updates)"

        except (TimedOut, NetworkError) as net_err:
             bot_status = f"Network Error checking status: {type(net_err).__name__}"
             bot_username = "N/A (Network Error)"
             logger.warning(f"Health check: Network error getting bot info: {net_err}")
        except Exception as e:
             bot_status = f"Error checking status: {type(e).__name__}"
             bot_username = "N/A (Error)"
             logger.warning(f"Health check: Error getting bot info: {e}", exc_info=False)
    elif ptb_app:
        bot_status = "Initialized (Bot object missing?)"
        bot_username = "N/A"
    else:
        bot_status = "Not Initialized"
        bot_username = "N/A"

    # Construct the response string
    response_lines = [
        f"TG Bot Summariser - Status: {bot_status} ({bot_username})",
        "--- Summarization ---",
        f"Primary Model (Gemini): {GEMINI_MODEL if _gemini_primary_enabled else 'DISABLED'}",
        f"Fallback Model (OpenRouter): {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}",
        "--- YouTube Transcripts ---",
        "Primary (Lib): Enabled",
        f"Fallback 1 (Supadata): {'Enabled' if SUPADATA_API_KEY else 'Disabled (Key Missing)'}",
        f"Fallback 2 (Apify Actor): {APIFY_ACTOR_ID if _apify_token_exists else 'DISABLED (Token Missing)'}",
        "--- Website Scraping ---",
        f"Primary (Crawl4AI): {'Enabled' if _crawl4ai_primary_scrape_enabled else 'DISABLED (Library/Driver Missing?)'}",
        "Fallback 1 (Direct+BS4): Enabled",
        f"Fallback 2 (urltotext): {'Enabled' if _urltotext_key_exists else 'Disabled (Key Missing)'}",
        f"Fallback 3/4 (RapidAPI): {'Enabled' if _rapidapi_key_exists else 'Disabled (Key Missing)'}",
        f"Fallback 5/6 (Apify Actors): {'Enabled' if _apify_token_exists else 'Disabled (Token Missing)'}"
    ]

    return PlainTextResponse("\n".join(response_lines))


async def telegram_webhook(request: Request) -> Response:
    """Handles incoming updates from Telegram."""
    global ptb_app, WEBHOOK_SECRET # Ensure ptb_app is accessible

    # --- Basic Checks ---
    if not ptb_app:
        logger.error("Webhook received but PTB application is not initialized.")
        return PlainTextResponse('Bot application not initialized', status_code=503) # Service Unavailable

    if not ptb_app.bot:
         logger.error("Webhook received but PTB bot object is not available.")
         return PlainTextResponse('Bot object not available', status_code=503)

    if not ptb_app.running:
        logger.warning("Webhook received but PTB application is not running (likely startup/shutdown).")
        # Return 200 OK to Telegram to prevent retries, but log the warning.
        return PlainTextResponse('Bot not actively processing', status_code=200)

    # --- Security Check (Secret Token) ---
    if WEBHOOK_SECRET:
         token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
         if not token_header:
             logger.warning("Webhook received request MISSING secret token header, but one is configured.")
             return Response(content="Forbidden: Missing secret token", status_code=403)
         if token_header != WEBHOOK_SECRET:
             logger.warning(f"Webhook received INVALID secret token. Header: '{token_header[:5]}...'")
             return Response(content="Forbidden: Invalid secret token", status_code=403)
         # If token matches, proceed

    # --- Process Update ---
    try:
        update_data = await request.json()
        update = Update.de_json(data=update_data, bot=ptb_app.bot)
        logger.debug(f"Processing update_id: {update.update_id} via webhook")
        # Use PTB's built-in update processing queue
        await ptb_app.process_update(update)
        # Return 200 OK to Telegram quickly after queuing the update
        return Response(status_code=200)
    except json.JSONDecodeError:
        logger.error("Webhook received invalid JSON data.")
        return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
    except Exception as e:
        # Log the error, but return 200 OK to prevent Telegram from resending the faulty update
        logger.error(f"Error processing webhook update: {e}", exc_info=True)
        return Response(status_code=200)

# --- Starlette App Definition ---
app = Starlette(
    debug=False, # Set to False for production
    lifespan=lifespan,
    routes=[
        Route("/", endpoint=health_check, methods=["GET"]),
        Route("/webhook", endpoint=telegram_webhook, methods=["POST"]),
    ]
)
logger.info("Starlette ASGI application created with health check ('/') and Telegram webhook ('/webhook') routes.")

# --- Development Server & Playwright Check ---
if __name__ == '__main__':
    import uvicorn
    logger.warning("Running in development mode using Uvicorn directly - NOT recommended for production!")

    # Check for Playwright installation on startup in dev mode
    playwright_installed = False
    try:
         from playwright.async_api import async_playwright
         playwright_installed = True
         logger.info("Playwright library found.")
         # Optional: Add playwright install command here if needed for dev
         # Consider running `playwright install --with-deps` manually in your dev env
    except ImportError:
         logger.critical("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
         logger.critical("Playwright library not found. Crawl4AI (Primary Scraper) WILL FAIL.")
         logger.critical("Install it: pip install playwright")
         logger.critical("Then install browsers: playwright install --with-deps")
         logger.critical("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")

    # Check Crawl4AI explicitly
    if not _crawl4ai_available:
        logger.critical("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        logger.critical("Crawl4AI library not found. Primary Scraper WILL BE DISABLED.")
        logger.critical("Install it: pip install crawl4ai")
        logger.critical("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
    elif not playwright_installed:
         logger.warning("Crawl4AI is installed, but Playwright is missing. Crawl4AI will likely fail without Playwright drivers.")

    # Get log level and port from environment or use defaults
    log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
    local_port = int(os.environ.get('PORT', 8080)) # Use PORT env var, default 8080

    # Run Uvicorn
    uvicorn.run(
        "__main__:app",
        host='0.0.0.0', # Listen on all interfaces
        port=local_port,
        log_level=log_level,
        reload=True # Enable auto-reload for development
    )