Spaces:

fmab777
/

telegram-summary-bot

Running

App Files Files Community

telegram-summary-bot / main.py

fmab777

Update main.py

72d2506 verified 27 days ago

raw

history blame

118 kB

	# main.py (Corrected PermissionError and Integrated Crawl4AI as Primary)
	import os
	import re
	import logging
	import asyncio
	import json
	import html
	import contextlib
	import traceback
	import urllib.parse # Added for URL encoding
	from typing import Optional, Dict, Any, Tuple, Union # Added Union

	# --- Frameworks ---
	from starlette.applications import Starlette
	from starlette.routing import Route
	from starlette.responses import PlainTextResponse, JSONResponse, Response
	from starlette.requests import Request

	# --- Telegram Bot ---
	from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup, Bot
	from telegram.ext import (
	Application,
	CommandHandler,
	MessageHandler,
	filters,
	ContextTypes,
	CallbackQueryHandler,
	)
	from telegram.constants import ParseMode
	from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest, TelegramError
	from telegram.request import HTTPXRequest, BaseRequest

	# --- Other Libraries ---
	import httpx
	from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
	from bs4 import BeautifulSoup
	from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type, before_sleep_log
	try:
	import lxml
	DEFAULT_PARSER = 'lxml'
	except ImportError:
	DEFAULT_PARSER = 'html.parser'

	# --- Google Gemini ---
	try:
	import google.generativeai as genai
	from google.generativeai.types import HarmCategory, HarmBlockThreshold
	_gemini_available = True
	except ImportError:
	genai = None
	HarmCategory = None
	HarmBlockThreshold = None
	_gemini_available = False
	# logger will be defined later, log warning after logger setup

	# --- Crawl4AI (NEW Primary Scraper) ---
	try:
	from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, BrowserConfig, CacheMode, CrawlResult
	from crawl4ai.models import MarkdownGenerationResult # Specific import for type hint
	_crawl4ai_available = True
	except ImportError:
	AsyncWebCrawler = None
	CrawlerRunConfig = None
	BrowserConfig = None
	CacheMode = None
	CrawlResult = None
	MarkdownGenerationResult = None # Corrected typo
	_crawl4ai_available = False
	# logger will be defined later, log warning after logger setup


	# --- Logging Setup ---
	logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO )
	logging.getLogger("httpx").setLevel(logging.WARNING)
	logging.getLogger("telegram.ext").setLevel(logging.INFO)
	logging.getLogger('telegram.bot').setLevel(logging.INFO)
	logging.getLogger("urllib3").setLevel(logging.INFO)
	logging.getLogger('gunicorn.error').setLevel(logging.INFO)
	logging.getLogger('uvicorn').setLevel(logging.INFO)
	logging.getLogger('starlette').setLevel(logging.INFO)
	if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
	# Keep C4AI logs less verbose unless debugging
	if _crawl4ai_available: logging.getLogger("crawl4ai").setLevel(logging.WARNING)

	logger = logging.getLogger(__name__)
	logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
	if not _gemini_available: logger.warning("google-generativeai library not found. Gemini functionality disabled.")
	if not _crawl4ai_available: logger.warning("crawl4ai library not found. Primary Web Scraping (Crawl4AI) disabled.")


	# --- Global variable for PTB app ---
	ptb_app: Optional[Application] = None

	# --- Define a writable base directory for Crawl4AI ---
	# Use /app which is the WORKDIR in the Dockerfile and is generally writable
	CRAWL4AI_BASE_DIR = "/app/.crawl4ai_cache"
	if _crawl4ai_available:
	try:
	os.makedirs(CRAWL4AI_BASE_DIR, exist_ok=True)
	logger.info(f"Ensured Crawl4AI base directory exists and is writable: {CRAWL4AI_BASE_DIR}")
	except Exception as e:
	# Log error but proceed, Crawl4AI might still work without cache/db
	logger.error(f"Could not create Crawl4AI base directory {CRAWL4AI_BASE_DIR}: {e}. Crawl4AI caching/DB features might fail.")


	# --- Environment Variable Loading & Configuration ---
	logger.info("Attempting to load secrets and configuration...")
	def get_secret(secret_name):
	value = os.environ.get(secret_name)
	if value: status = "Found"; log_length = min(len(value), 8); value_start = value[:log_length]; logger.info(f"Secret '{secret_name}': {status} (Value starts with: {value_start}...)")
	else: status = "Not Found"; logger.warning(f"Secret '{secret_name}': {status}")
	return value

	TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
	OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY') # Summarizer Fallback
	URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY') # Scrape Fallback 2 (WAS 1)
	SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY') # YT Fallback 1
	APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN') # YT Fallback 2 + Scrape Fallbacks 5 & 6 (WAS 4 & 5)
	RAPIDAPI_KEY = get_secret('RAPIDAPI_KEY') # Scrape Fallbacks 3 & 4 (WAS 2 & 3)
	WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
	GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Primary Summarizer

	# Models (User can still configure via env vars)
	OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-coder-33b-instruct") # Fallback Model
	APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") # Default YT Actor
	GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-1.5-flash-latest") # Primary Model

	# Specific Actor IDs for Website Scraping Fallbacks
	APIFY_CRAWLER_ACTOR_ID = "apify/website-content-crawler" # Fallback 5 (WAS 4)
	APIFY_TEXT_SCRAPER_ACTOR_ID = "karamelo/text-scraper-free" # Fallback 6 (WAS 5)

	if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
	if not GEMINI_API_KEY: logger.error("❌ ERROR: GEMINI_API_KEY not found. Primary summarization (Gemini) will fail.")
	if not OPENROUTER_API_KEY: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback summarization will fail.")
	if not RAPIDAPI_KEY: logger.warning("⚠️ WARNING: RAPIDAPI_KEY not found. RapidAPI scraping fallbacks (3 & 4) will be unavailable.") # Updated numbers
	if not APIFY_API_TOKEN: logger.warning("⚠️ WARNING: APIFY_API_TOKEN not found. YT transcript fallback (2) and Website scraping fallbacks (5 & 6) will be unavailable.") # Updated numbers

	_gemini_primary_enabled = _gemini_available and bool(GEMINI_API_KEY)
	if not _gemini_available: logger.warning("⚠️ WARNING: google-generativeai library missing. Gemini disabled.")
	elif not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found or empty. Gemini disabled.")

	_openrouter_fallback_enabled = bool(OPENROUTER_API_KEY)
	if not _openrouter_fallback_enabled: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback disabled.")

	_crawl4ai_primary_scrape_enabled = _crawl4ai_available # Check if library loaded
	if not _crawl4ai_available: logger.error("❌ ERROR: crawl4ai library missing. Primary web scraping disabled. Will attempt fallbacks immediately.")

	if not URLTOTEXT_API_KEY: logger.warning("Optional secret 'URLTOTEXT_API_KEY' not found. Web scraping fallback 2 unavailable.") # Updated number
	if not SUPADATA_API_KEY: logger.warning("Optional secret 'SUPADATA_API_KEY' not found. YT transcript fallback 1 unavailable.")
	# APIFY_API_TOKEN warning handled above
	# RAPIDAPI_KEY warning handled above
	if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")

	logger.info("Secret loading and configuration check finished.")
	logger.info(f"Primary Web Scraper (Crawl4AI): {'ENABLED' if _crawl4ai_primary_scrape_enabled else 'DISABLED - Check Logs for Details'}")
	logger.info(f"Using Gemini Model (Primary Summarizer): {GEMINI_MODEL if _gemini_primary_enabled else 'DISABLED'}")
	logger.info(f"Using OpenRouter Model (Fallback Summarizer): {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}")
	logger.info(f"Using Apify Actor (YT Default): {APIFY_ACTOR_ID}")
	logger.info(f"Using Apify Actor (Web Scrape Fallback 5): {APIFY_CRAWLER_ACTOR_ID}")
	logger.info(f"Using Apify Actor (Web Scrape Fallback 6): {APIFY_TEXT_SCRAPER_ACTOR_ID}")

	_apify_token_exists = bool(APIFY_API_TOKEN)
	_urltotext_key_exists = bool(URLTOTEXT_API_KEY)
	_rapidapi_key_exists = bool(RAPIDAPI_KEY)


	if _gemini_primary_enabled:
	try: genai.configure(api_key=GEMINI_API_KEY); logger.info("Google GenAI client configured successfully.")
	except Exception as e: logger.error(f"Failed to configure Google GenAI client: {e}"); _gemini_primary_enabled = False

	# --- Retry Decorator (Unchanged) ---
	@retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
	async def retry_bot_operation(func, args, *kwargs):
	try: return await func(args, *kwargs)
	except BadRequest as e:
	ignore_errors = [ "message is not modified", "query is too old", "message to edit not found", "chat not found", "bot was blocked by the user", ]
	if any(err in str(e).lower() for err in ignore_errors): logger.warning(f"Ignoring non-critical BadRequest: {e}"); return None
	logger.error(f"Potentially critical BadRequest: {e}"); raise
	except TelegramError as e: logger.warning(f"TelegramError (will retry if applicable): {e}"); raise
	except Exception as e: logger.error(f"Unexpected error during bot operation: {e}", exc_info=True); raise

	# --- Helper Functions (Unchanged) ---
	def is_youtube_url(url):
	youtube_regex = re.compile( r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com\|youtu\.be)/' r'(?:watch\?v=\|embed/\|v/\|shorts/\|live/\|attribution_link\?a=.*&u=/watch\?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
	match = youtube_regex.search(url); logger.debug(f"is_youtube_url '{url}': {bool(match)}"); return bool(match)
	def extract_youtube_id(url):
	youtube_regex = re.compile( r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com\|youtu\.be)/' r'(?:watch\?v=\|embed/\|v/\|shorts/\|live/\|attribution_link\?a=.*&u=/watch\?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
	match = youtube_regex.search(url)
	if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
	else: logger.warning(f"Could not extract YT ID from {url}"); return None


	# --- Content Fetching Functions ---

	# --- YouTube Transcript Fetching (Unchanged) ---
	async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
	if not video_id: logger.error("[Supadata] No video_id provided"); return None
	if not api_key: logger.error("[Supadata] API key missing."); return None
	logger.info(f"[YT Fallback 1] Attempting fetch for video ID: {video_id} via Supadata")
	api_endpoint = "https://api.supadata.ai/v1/youtube/transcript"
	params = {"videoId": video_id, "format": "text"}; headers = {"X-API-Key": api_key}
	try:
	async with httpx.AsyncClient(timeout=30.0) as client:
	response = await client.get(api_endpoint, headers=headers, params=params)
	logger.debug(f"[Supadata] Status code {response.status_code} for {video_id}")
	if response.status_code == 200:
	try:
	data = response.json() if response.text else None # Check if text exists before json decode
	content = None
	if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
	if not content and response.text: content = response.text # Fallback to raw text if json parse fails or content key missing
	if content and isinstance(content, str): logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}"); return content.strip()
	else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
	except json.JSONDecodeError: logger.warning(f"[Supadata] Received 200 but failed JSON decode for {video_id}. Using raw text if available. Response: {response.text[:200]}"); return response.text.strip() if response.text else None
	except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
	elif response.status_code in [401, 403]: logger.error(f"[Supadata] Auth error ({response.status_code}). Check API key."); return None
	elif response.status_code == 404: logger.warning(f"[Supadata] Not found (404) for {video_id}."); return None
	else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
	except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
	except httpx.RequestError as e:
	if "CERTIFICATE_VERIFY_FAILED" in str(e): logger.error(f"[Supadata] SSL Cert Verify Failed for {video_id}: {e}")
	else: logger.error(f"[Supadata] Request error for {video_id}: {e}")
	return None
	except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None

	async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
	"""Fallback YT 2: Fetches YouTube transcript using default Apify Actor."""
	global APIFY_ACTOR_ID # Uses the default YT actor ID
	if not video_url: logger.error("[Apify YT] No video_url provided"); return None
	if not api_token: logger.error("[Apify YT] API token missing."); return None
	logger.info(f"[YT Fallback 2] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")

	sync_items_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
	params = {"token": api_token}
	payload = { "urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5, "channelHandleBoolean": False, "channelNameBoolean": False, "datePublishedBoolean": False, "relativeDateTextBoolean": False, }
	headers = {"Content-Type": "application/json"}

	try:
	async with httpx.AsyncClient(timeout=120.0) as client:
	logger.debug(f"[Apify YT] POST Request to {sync_items_endpoint} for {video_url}")
	response = await client.post(sync_items_endpoint, headers=headers, params=params, json=payload)
	logger.debug(f"[Apify YT] Received status code {response.status_code} for {video_url}")

	if response.status_code == 200:
	try:
	results = response.json()
	if isinstance(results, list) and len(results) > 0:
	item = results[0]; content = None
	if "captions" in item and isinstance(item["captions"], str): content = item["captions"]
	elif "text" in item and isinstance(item["text"], str): content = item["text"]
	elif "transcript" in item and isinstance(item["transcript"], str): content = item["transcript"]
	elif "captions" in item and isinstance(item["captions"], list):
	if len(item["captions"]) > 0 and isinstance(item["captions"][0], dict) and 'text' in item["captions"][0]: content = " ".join(line.get("text", "") for line in item["captions"] if line.get("text"))
	elif len(item["captions"]) > 0 and isinstance(item["captions"][0], str): content = " ".join(item["captions"])

	if content and isinstance(content, str): logger.info(f"[Apify YT] Success via REST for {video_url}. Length: {len(content)}"); return content.strip()
	else: logger.warning(f"[Apify YT] Dataset item parsed but transcript content empty/invalid format for {video_url}. Item keys: {list(item.keys())}"); return None
	else: logger.warning(f"[Apify YT] Actor success but dataset was empty for {video_url}. Response: {results}"); return None
	except json.JSONDecodeError: logger.error(f"[Apify YT] Failed JSON decode. Status:{response.status_code}. Resp:{response.text[:200]}"); return None
	except Exception as e: logger.error(f"[Apify YT] Error processing success response for {video_url}: {e}", exc_info=True); return None
	elif response.status_code == 400: logger.error(f"[Apify YT] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
	elif response.status_code == 401: logger.error("[Apify YT] Auth error (401). Check token."); return None
	elif response.status_code == 404: logger.error(f"[Apify YT] Endpoint/Actor Not Found (404). Actor: {APIFY_ACTOR_ID} Resp:{response.text[:200]}"); return None
	else: logger.error(f"[Apify YT] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
	except httpx.TimeoutException as e: logger.error(f"[Apify YT] Timeout during API interaction for {video_url}: {e}"); return None
	except httpx.HTTPStatusError as e: logger.error(f"[Apify YT] HTTP Status Error during API interaction for {video_url}: {e}"); return None
	except httpx.RequestError as e: logger.error(f"[Apify YT] Request error during API interaction for {video_url}: {e}"); return None
	except Exception as e: logger.error(f"[Apify YT] Unexpected error during Apify YT call for {video_url}: {e}", exc_info=True); return None

	async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
	global SUPADATA_API_KEY, APIFY_API_TOKEN, _apify_token_exists
	if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
	logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
	transcript_text = None
	logger.info("[Primary YT] Attempting youtube-transcript-api...")
	try:
	transcript_list = await asyncio.to_thread( YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'] )
	if transcript_list: transcript_text = " ".join([item['text'] for item in transcript_list if 'text' in item])
	if transcript_text: logger.info(f"[Primary YT] Success via lib for {video_id} (len: {len(transcript_text)})"); return transcript_text
	else: logger.warning(f"[Primary YT] Transcript list/text empty for {video_id}"); transcript_text = None
	except NoTranscriptFound: logger.warning(f"[Primary YT] No transcript found via lib for {video_id}.")
	except TranscriptsDisabled: logger.warning(f"[Primary YT] Transcripts disabled via lib for {video_id}.")
	except Exception as e: logger.warning(f"[Primary YT] Error via lib for {video_id}: {e}"); transcript_text = None

	if transcript_text is None:
	logger.info("[Fallback YT 1] Trying Supadata API...")
	if SUPADATA_API_KEY:
	transcript_text = await get_transcript_via_supadata(video_id, SUPADATA_API_KEY)
	if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text
	else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
	else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")

	if transcript_text is None:
	logger.info("[Fallback YT 2] Trying Apify REST API (Default YT Actor)...")
	if _apify_token_exists:
	transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
	if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify Default YT Actor for {video_url}"); return transcript_text
	else: logger.warning(f"[Fallback YT 2] Apify Default YT Actor failed or no content for {video_url}.")
	else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")

	if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
	return transcript_text

	# --- Website Content Fetching (MODIFIED SECTION) ---

	# --- Method 0: Primary Web Scrape (Crawl4AI) ---
	async def get_website_content_via_crawl4ai(url: str) -> Optional[str]:
	"""Primary Web Method: Fetches and extracts content using Crawl4AI."""
	global _crawl4ai_primary_scrape_enabled, CRAWL4AI_BASE_DIR # Use the defined base dir
	if not _crawl4ai_primary_scrape_enabled:
	logger.warning("[Web Scrape Primary] Crawl4AI called but library/driver is unavailable.")
	return None
	if not url: logger.error("[Web Scrape Primary] Crawl4AI: No URL provided"); return None
	logger.info(f"[Web Scrape Primary] Attempting fetch and extraction via Crawl4AI for: {url}")

	# Configure the crawl run - enable cache now
	run_config = CrawlerRunConfig(
	cache_mode=CacheMode.ENABLED, # Use cache now that base_dir is set
	page_timeout=60000, # 60 sec timeout
	verbose=False, # Keep logs cleaner
	scan_full_page=True, # Try to load dynamic content by scrolling
	remove_overlay_elements=True, # Try to remove cookie banners/popups
	# Consider adding markdown generation strategy if needed later
	# from crawl4ai.content_filter_strategy import PruningContentFilter
	# from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
	# md_generator = DefaultMarkdownGenerator(content_filter=PruningContentFilter())
	# markdown_generator=md_generator,
	)

	# BrowserConfig defaults are usually okay (headless chromium)
	# browser_config = BrowserConfig(headless=True, verbose=False)

	extracted_text: Optional[str] = None
	try:
	# Use context manager and provide base_directory to fix PermissionError
	# Pass browser_config if needed: AsyncWebCrawler(config=browser_config, base_directory=CRAWL4AI_BASE_DIR)
	async with AsyncWebCrawler(base_directory=CRAWL4AI_BASE_DIR) as crawler:
	logger.debug(f"[Web Scrape Primary] Calling Crawl4AI crawler.arun for {url}")
	result: CrawlResult = await crawler.arun(url=url, config=run_config)
	logger.debug(f"[Web Scrape Primary] Crawl4AI arun completed. Success: {result.success}, Status: {result.status_code}")

	if result.success:
	# Check for markdown generation result first (preferred)
	if result.markdown and isinstance(result.markdown, MarkdownGenerationResult):
	# Prioritize 'fit_markdown' if available and substantial
	if result.markdown.fit_markdown and isinstance(result.markdown.fit_markdown, str) and len(result.markdown.fit_markdown.strip()) > 50:
	extracted_text = result.markdown.fit_markdown.strip()
	logger.debug(f"[Web Scrape Primary] Using 'fit_markdown' from MarkdownGenerationResult for {url}")
	# Fallback to 'raw_markdown' if 'fit_markdown' is missing/short
	elif result.markdown.raw_markdown and isinstance(result.markdown.raw_markdown, str):
	extracted_text = result.markdown.raw_markdown.strip()
	logger.debug(f"[Web Scrape Primary] Using 'raw_markdown' (fit_markdown unavailable/short) for {url}")
	else:
	logger.warning(f"[Web Scrape Primary] Markdown object present but no usable text content (fit/raw) for {url}. Trying cleaned_html.")
	# Fall through to cleaned_html parsing if markdown is unusable

	# Handle if result.markdown is just a string (older version compatibility?)
	elif result.markdown and isinstance(result.markdown, str):
	extracted_text = result.markdown.strip()
	logger.debug(f"[Web Scrape Primary] Using direct result.markdown string for {url}")

	# If no markdown or unusable markdown, try parsing cleaned_html
	if not extracted_text and result.cleaned_html:
	logger.warning(f"[Web Scrape Primary] No usable markdown found, parsing cleaned_html with BS4 for {url}")
	try:
	# Use a simple BS4 parse as a fallback within Crawl4AI's result
	soup = BeautifulSoup(result.cleaned_html, DEFAULT_PARSER)
	extracted_text = " ".join(line.strip() for line in soup.get_text(separator='\n', strip=True).splitlines() if line.strip())
	except Exception as bs_err:
	logger.error(f"[Web Scrape Primary] Error parsing Crawl4AI's cleaned_html with BS4 for {url}: {bs_err}")
	extracted_text = None # Ensure it's None if parsing fails

	# Final check on extracted text length
	if extracted_text and len(extracted_text) > 50: # Check for meaningful content length
	logger.info(f"[Web Scrape Primary] Success via Crawl4AI for {url}. Length: {len(extracted_text)}")
	return extracted_text
	else:
	content_len = len(extracted_text) if extracted_text else 0
	logger.warning(f"[Web Scrape Primary] Crawl4AI success but extracted text too short or empty for {url}. Length: {content_len}. Will try fallbacks.")
	return None # Return None to trigger fallbacks
	else:
	error_msg = result.error_message or f"Crawl failed (status code: {result.status_code})"
	logger.error(f"[Web Scrape Primary] Crawl4AI failed for {url}. Error: {error_msg}. Will try fallbacks.")
	return None # Return None to trigger fallbacks

	except asyncio.TimeoutError:
	logger.error(f"[Web Scrape Primary] Timeout error during Crawl4AI crawl for {url}. Will try fallbacks.")
	return None
	except ImportError as ie:
	if "playwright" in str(ie).lower():
	logger.critical(f"[Web Scrape Primary] Playwright library missing or drivers not installed! Run 'pip install playwright && playwright install --with-deps'. Error: {ie}")
	_crawl4ai_primary_scrape_enabled = False # Disable future attempts
	else:
	logger.error(f"[Web Scrape Primary] Unexpected ImportError during Crawl4AI execution for {url}: {ie}", exc_info=True)
	return None # Return None to trigger fallbacks
	except Exception as e:
	# Catch potential Playwright errors about missing executables explicitly
	if "playwright" in str(e).lower() and ("install" in str(e).lower() or "executable" in str(e).lower() or "path" in str(e).lower()):
	logger.critical("[Web Scrape Primary] Playwright drivers likely missing! Run 'playwright install --with-deps' in your environment. Disabling Crawl4AI.")
	_crawl4ai_primary_scrape_enabled = False # Disable future attempts
	else:
	logger.error(f"[Web Scrape Primary] Unexpected error during Crawl4AI execution for {url}: {e}", exc_info=True)
	return None # Return None to trigger fallbacks


	# --- Fallback 1: Direct Fetch + BS4 (Previously Primary) ---
	async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
	"""Directly fetches URL content using httpx. (Fallback Web Method 1 - Fetching part)"""
	headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
	try:
	async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
	logger.debug(f"[Web Scrape Fallback 1] Sending GET request to {url}")
	response = await client.get(url)
	logger.debug(f"[Web Scrape Fallback 1] Received response {response.status_code} from {url}")
	response.raise_for_status()
	content_type = response.headers.get('content-type', '').lower()
	if 'html' not in content_type and 'xml' not in content_type:
	logger.warning(f"[Web Scrape Fallback 1] Non-HTML/XML content type received from {url}: {content_type}")
	if 'text/plain' in content_type: logger.info(f"[Web Scrape Fallback 1] Content type is text/plain for {url}, reading."); return response.text
	return None
	try: return response.text
	except Exception as e: logger.error(f"[Web Scrape Fallback 1] Error decoding response text for {url}: {e}"); return None
	except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape Fallback 1] HTTP error {e.response.status_code} fetching {url}: {e}")
	except httpx.TimeoutException: logger.error(f"[Web Scrape Fallback 1] Timeout error fetching {url}")
	except httpx.TooManyRedirects: logger.error(f"[Web Scrape Fallback 1] Too many redirects fetching {url}")
	except httpx.RequestError as e: logger.error(f"[Web Scrape Fallback 1] Request error fetching {url}: {e}")
	except Exception as e: logger.error(f"[Web Scrape Fallback 1] Unexpected error fetching {url}: {e}", exc_info=True)
	return None

	async def get_website_content_direct_bs4(url: str) -> Optional[str]:
	"""Fallback 1: Fetches HTML directly and parses with BeautifulSoup."""
	if not url: logger.error("[Web Scrape Fallback 1] No URL provided"); return None
	logger.info(f"[Web Scrape Fallback 1] Attempting direct fetch and parse for: {url}")
	html_content = await fetch_url_content_for_scrape(url)
	if not html_content: logger.warning(f"[Web Scrape Fallback 1] Direct fetch failed for {url}."); return None
	try:
	def parse_html(content: str) -> Optional[str]:
	try:
	soup = BeautifulSoup(content, DEFAULT_PARSER)
	# More aggressive removal of potentially noisy tags
	for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "textarea", "select", "option", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "picture", "source", "map", "area", "details", "dialog"]):
	element.extract()
	# Try common main content containers
	main_content = soup.find('main') or soup.find('article') or soup.find(role='main') or soup.find(id=re.compile(r'content\|main\|body\|post', re.I)) or soup.find(class_=re.compile(r'content\|main\|body\|article\|post\|entry', re.I))
	target_element = main_content if main_content else soup.body
	if not target_element:
	logger.warning(f"[Web Scrape Fallback 1 Parse] Could not find body or main content candidates for {url}")
	# Fallback: Get text from the whole soup if no specific container found
	text_from_root = " ".join(line.strip() for line in soup.get_text(separator='\n', strip=True).splitlines() if line.strip())
	if text_from_root and len(text_from_root) > 50:
	logger.warning(f"[Web Scrape Fallback 1 Parse] Using text from root as fallback for {url}. Length: {len(text_from_root)}")
	return text_from_root
	return None # Really couldn't find anything useful

	# Extract text from the chosen element (main_content or body)
	lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
	text = " ".join(lines)

	# Check if the extracted text is meaningful
	if not text or len(text) < 50: # Increased threshold slightly
	logger.warning(f"[Web Scrape Fallback 1 Parse] Extracted text from target element too short or empty for {url}. Length: {len(text)}")
	# As a final attempt, try getting text from the entire soup again
	text_from_root_final = " ".join(line.strip() for line in soup.get_text(separator='\n', strip=True).splitlines() if line.strip())
	if text_from_root_final and len(text_from_root_final) > 50:
	logger.warning(f"[Web Scrape Fallback 1 Parse] Reverting to text from root as final attempt for {url}. Length: {len(text_from_root_final)}")
	return text_from_root_final
	return None # Give up if even root text is too short

	return text # Return the text from the target element
	except Exception as parse_e:
	logger.error(f"[Web Scrape Fallback 1 Parse] BS4 parsing error for {url}: {parse_e}", exc_info=False)
	return None

	# Run parsing in a separate thread to avoid blocking asyncio loop
	text_content = await asyncio.to_thread(parse_html, html_content)

	if text_content:
	logger.info(f"[Web Scrape Fallback 1] Success via direct fetch & parse for {url} (len: {len(text_content)})")
	return text_content
	else:
	logger.warning(f"[Web Scrape Fallback 1] Parsing failed or yielded no meaningful content for {url}.")
	return None
	except Exception as e:
	logger.error(f"[Web Scrape Fallback 1] Unexpected error during parsing phase for {url}: {e}", exc_info=True)
	return None

	# --- Fallback 2: urltotext.com API ---
	async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
	"""Fallback 2: Fetches website content using urltotext.com API."""
	if not url: logger.error("[Web Scrape Fallback 2] No URL"); return None
	if not api_key: logger.error("[Web Scrape Fallback 2] urltotext.com API key missing."); return None
	logger.info(f"[Web Scrape Fallback 2] Attempting fetch for: {url} using urltotext.com API")
	api_endpoint = "https://urltotext.com/api/v1/urltotext/"
	payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
	headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" }
	try:
	async with httpx.AsyncClient(timeout=45.0) as client:
	logger.debug(f"[Web Scrape Fallback 2] Sending request to urltotext.com API for {url}")
	response = await client.post(api_endpoint, headers=headers, json=payload)
	logger.debug(f"[Web Scrape Fallback 2] Received status {response.status_code} from urltotext.com API for {url}")
	if response.status_code == 200:
	try:
	data = response.json()
	content = data.get("data", {}).get("content"); credits = data.get("credits_used", "N/A"); warning = data.get("data", {}).get("warning")
	if warning: logger.warning(f"[Web Scrape Fallback 2] urltotext.com API Warning for {url}: {warning}")
	if content and isinstance(content, str) and len(content.strip()) > 30: # Check length after stripping
	logger.info(f"[Web Scrape Fallback 2] Success via urltotext.com API for {url}. Len: {len(content.strip())}. Credits: {credits}")
	return content.strip()
	else:
	content_len = len(content.strip()) if content and isinstance(content, str) else 0
	logger.warning(f"[Web Scrape Fallback 2] urltotext.com API success but content empty/short for {url}. Len: {content_len}. Resp: {data}"); return None
	except json.JSONDecodeError: logger.error(f"[Web Scrape Fallback 2] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
	except Exception as e: logger.error(f"[Web Scrape Fallback 2] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
	elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[Web Scrape Fallback 2] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
	else: logger.error(f"[Web Scrape Fallback 2] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
	except httpx.TimeoutException: logger.error(f"[Web Scrape Fallback 2] Timeout connecting to urltotext.com API for {url}"); return None
	except httpx.RequestError as e: logger.error(f"[Web Scrape Fallback 2] Request error connecting to urltotext.com API for {url}: {e}"); return None
	except Exception as e: logger.error(f"[Web Scrape Fallback 2] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None


	# --- Fallback 3: Scraper's Proxy Parser via RapidAPI ---
	async def get_website_content_via_scrapers_proxy(url: str, api_key: str) -> Optional[str]:
	"""Fallback 3: Fetches website content using Scraper's Proxy Parser via RapidAPI."""
	if not url: logger.error("[Web Scrape Fallback 3] No URL provided"); return None
	if not api_key: logger.error("[Web Scrape Fallback 3] RapidAPI key missing."); return None
	logger.info(f"[Web Scrape Fallback 3] Attempting fetch for: {url} using Scraper's Proxy Parser API")
	api_host = "scrapers-proxy2.p.rapidapi.com"
	encoded_url = urllib.parse.quote(url, safe='')
	api_endpoint = f"https://{api_host}/parser?url={encoded_url}&auto_detect=true"
	headers = { "x-rapidapi-host": api_host, "x-rapidapi-key": api_key, "accept-encoding": "gzip" }
	try:
	async with httpx.AsyncClient(timeout=40.0) as client:
	logger.debug(f"[Web Scrape Fallback 3] Sending GET request to {api_host} for {url}")
	response = await client.get(api_endpoint, headers=headers)
	logger.debug(f"[Web Scrape Fallback 3] Received status {response.status_code} from {api_host} for {url}")
	if response.status_code == 200:
	try:
	data = response.json()
	content = data.get("content"); title = data.get("title"); extracted_text = ""
	if title and isinstance(title, str): extracted_text += title.strip() + ". "
	if content and isinstance(content, str): extracted_text += content.strip()
	extracted_text = extracted_text.strip() # Strip final result
	if extracted_text and len(extracted_text) > 30:
	logger.info(f"[Web Scrape Fallback 3] Success via Scraper's Proxy API for {url}. Len: {len(extracted_text)}")
	return extracted_text
	else:
	logger.warning(f"[Web Scrape Fallback 3] Scraper's Proxy API success but content/title too short/empty for {url}. Keys: {list(data.keys())}. Length: {len(extracted_text)}")
	return None
	except json.JSONDecodeError: logger.error(f"[Web Scrape Fallback 3] Failed JSON decode Scraper's Proxy API for {url}. Status:{response.status_code}. Resp:{response.text[:500]}"); return None
	except Exception as e: logger.error(f"[Web Scrape Fallback 3] Error processing Scraper's Proxy API success response for {url}: {e}", exc_info=True); return None
	elif response.status_code == 401: logger.error(f"[Web Scrape Fallback 3] Auth error (401) with {api_host}. Check RapidAPI key."); return None
	elif response.status_code == 403: logger.error(f"[Web Scrape Fallback 3] Forbidden (403) from {api_host}. Check subscription/limits."); return None
	elif response.status_code == 429: logger.warning(f"[Web Scrape Fallback 3] Rate Limit (429) from {api_host}."); return None
	elif response.status_code >= 500: logger.error(f"[Web Scrape Fallback 3] Server error ({response.status_code}) from {api_host}. Resp:{response.text[:200]}"); return None
	else: logger.error(f"[Web Scrape Fallback 3] Unexpected status {response.status_code} from {api_host} API for {url}. Resp:{response.text[:200]}"); return None
	except httpx.TimeoutException: logger.error(f"[Web Scrape Fallback 3] Timeout connecting to {api_host} API for {url}"); return None
	except httpx.RequestError as e: logger.error(f"[Web Scrape Fallback 3] Request error connecting to {api_host} API for {url}: {e}"); return None
	except Exception as e: logger.error(f"[Web Scrape Fallback 3] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None


	# --- Fallback 4: AI Web Scraper via RapidAPI ---
	async def get_website_content_via_ai_web_scraper(url: str, api_key: str) -> Optional[str]:
	"""Fallback 4: Fetches website content using AI Web Scraper via RapidAPI."""
	if not url: logger.error("[Web Scrape Fallback 4] No URL provided"); return None
	if not api_key: logger.error("[Web Scrape Fallback 4] RapidAPI key missing."); return None
	logger.info(f"[Web Scrape Fallback 4] Attempting fetch for: {url} using AI Web Scraper API")
	api_host = "ai-web-scraper.p.rapidapi.com"; api_endpoint = f"https://{api_host}/extract_content/v1"
	headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'x-rapidapi-host': api_host, 'x-rapidapi-key': api_key }
	payload = {'url': url}
	try:
	async with httpx.AsyncClient(timeout=45.0) as client:
	logger.debug(f"[Web Scrape Fallback 4] Sending POST request to {api_host} for {url}")
	response = await client.post(api_endpoint, headers=headers, data=payload)
	logger.debug(f"[Web Scrape Fallback 4] Received status {response.status_code} from {api_host} for {url}")
	if response.status_code == 200:
	try:
	data = response.json(); content = None
	if isinstance(data, dict): content = data.get("content") or data.get("text") or data.get("extracted_text") or data.get("result")
	elif isinstance(data, str): content = data

	if content and isinstance(content, str):
	content_stripped = content.strip()
	if len(content_stripped) > 30:
	logger.info(f"[Web Scrape Fallback 4] Success via AI Web Scraper API for {url}. Len: {len(content_stripped)}")
	return content_stripped
	else:
	logger.warning(f"[Web Scrape Fallback 4] AI Web Scraper API success but content too short after stripping for {url}. Len: {len(content_stripped)}")
	return None
	else:
	keys_info = f"Keys: {list(data.keys())}" if isinstance(data, dict) else f"Type: {type(data)}"
	logger.warning(f"[Web Scrape Fallback 4] AI Web Scraper API success but content empty/invalid format for {url}. {keys_info}")
	return None
	except json.JSONDecodeError:
	raw_text = response.text.strip()
	if raw_text and len(raw_text) > 30:
	logger.warning(f"[Web Scrape Fallback 4] Failed JSON decode for AI Web Scraper, but found raw text. Status:{response.status_code}. Using raw text. Len: {len(raw_text)}")
	return raw_text
	else:
	logger.error(f"[Web Scrape Fallback 4] Failed JSON decode AI Web Scraper API for {url}. Status:{response.status_code}. Resp empty/short:{raw_text[:500]}")
	return None
	except Exception as e: logger.error(f"[Web Scrape Fallback 4] Error processing AI Web Scraper API success response for {url}: {e}", exc_info=True); return None
	elif response.status_code == 401: logger.error(f"[Web Scrape Fallback 4] Auth error (401) with {api_host}. Check RapidAPI key."); return None
	elif response.status_code == 403: logger.error(f"[Web Scrape Fallback 4] Forbidden (403) from {api_host}. Check subscription/limits."); return None
	elif response.status_code == 429: logger.warning(f"[Web Scrape Fallback 4] Rate Limit (429) from {api_host}."); return None
	elif response.status_code >= 500: logger.error(f"[Web Scrape Fallback 4] Server error ({response.status_code}) from {api_host}. Resp:{response.text[:200]}"); return None
	else: logger.error(f"[Web Scrape Fallback 4] Unexpected status {response.status_code} from {api_host} API for {url}. Resp:{response.text[:200]}"); return None
	except httpx.TimeoutException: logger.error(f"[Web Scrape Fallback 4] Timeout connecting to {api_host} API for {url}"); return None
	except httpx.RequestError as e: logger.error(f"[Web Scrape Fallback 4] Request error connecting to {api_host} API for {url}: {e}"); return None
	except Exception as e: logger.error(f"[Web Scrape Fallback 4] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None


	# --- Fallback 5 & 6: Apify Website Scraping ---
	async def _run_apify_actor_for_web_content(url: str, api_token: str, actor_id: str, actor_name: str, fallback_num: int) -> Optional[str]:
	"""Generic function to run an Apify actor and get text content."""
	if not url: logger.error(f"[{actor_name} - FB{fallback_num}] No URL provided"); return None
	if not api_token: logger.error(f"[{actor_name} - FB{fallback_num}] API token missing."); return None
	logger.info(f"[{actor_name} - FB{fallback_num}] Attempting fetch for URL: {url} (Actor: {actor_id})")

	sync_items_endpoint = f"https://api.apify.com/v2/acts/{actor_id}/run-sync-get-dataset-items"; params = {"token": api_token}
	# Define different inputs based on actor
	run_input: Dict[str, Any]
	if actor_id == APIFY_TEXT_SCRAPER_ACTOR_ID:
	# Input for Text Scraper Free
	run_input = { "urls": [url] }
	logger.debug(f"[{actor_name} - FB{fallback_num}] Using simplified input for Text Scraper: {run_input}")
	elif actor_id == APIFY_CRAWLER_ACTOR_ID:
	# Input for Website Content Crawler (limit crawl depth)
	run_input = {
	"startUrls": [{"url": url}],
	"maxCrawlPages": 1, # Only crawl the start URL
	"maxCrawlDepth": 0, # Do not follow links
	"crawlerType": "playwright:firefox", # Or chromium
	"maxResults": 1,
	# You might need to add parameters to extract specific content if default fails
	# e.g., "pageFunction": "async function pageFunction(context) { return { text: document.body.innerText }; }"
	}
	logger.debug(f"[{actor_name} - FB{fallback_num}] Using input for Website Content Crawler: {run_input}")
	else:
	logger.error(f"[{actor_name} - FB{fallback_num}] Unknown Apify actor ID: {actor_id}. Cannot determine input format.")
	return None

	headers = {"Content-Type": "application/json"}

	try:
	async with httpx.AsyncClient(timeout=180.0) as client: # Increased timeout for Apify actors
	logger.debug(f"[{actor_name} - FB{fallback_num}] POST Request to {sync_items_endpoint} for {url}")
	response = await client.post(sync_items_endpoint, headers=headers, params=params, json=run_input)
	logger.debug(f"[{actor_name} - FB{fallback_num}] Received status code {response.status_code} for {url}")
	if response.status_code == 200:
	try:
	results = response.json()
	if isinstance(results, list) and len(results) > 0:
	item = results[0]; content = None
	# Prioritize 'text', then 'content', then 'markdown'
	if "text" in item and isinstance(item["text"], str): content = item["text"]
	elif "content" in item and isinstance(item["content"], str): content = item["content"]
	elif "markdown" in item and isinstance(item["markdown"], str): content = item["markdown"]
	# Fallback: Parse 'html' if other fields are missing
	elif "html" in item and isinstance(item["html"], str):
	logger.warning(f"[{actor_name} - FB{fallback_num}] No 'text', 'content', or 'markdown' found, parsing 'html'.")
	try:
	soup = BeautifulSoup(item["html"], DEFAULT_PARSER)
	content = " ".join(line.strip() for line in soup.get_text(separator='\n', strip=True).splitlines() if line.strip())
	except Exception as bs_err:
	logger.error(f"[{actor_name} - FB{fallback_num}] Error parsing Apify HTML with BS4: {bs_err}")
	content = None # Ensure content is None if parsing fails

	if content and isinstance(content, str):
	content_stripped = content.strip()
	if len(content_stripped) > 50: # Increased length check
	logger.info(f"[{actor_name} - FB{fallback_num}] Success via REST for {url}. Length: {len(content_stripped)}")
	return content_stripped
	else:
	logger.warning(f"[{actor_name} - FB{fallback_num}] Dataset item parsed but text content too short after stripping for {url}. Length: {len(content_stripped)}")
	return None
	else:
	logger.warning(f"[{actor_name} - FB{fallback_num}] Dataset item parsed but text content empty or invalid format for {url}. Item keys: {list(item.keys())}")
	return None
	else: logger.warning(f"[{actor_name} - FB{fallback_num}] Actor success but dataset was empty for {url}. Response: {results}"); return None
	except json.JSONDecodeError: logger.error(f"[{actor_name} - FB{fallback_num}] Failed JSON decode. Status:{response.status_code}. Resp:{response.text[:200]}"); return None
	except Exception as e: logger.error(f"[{actor_name} - FB{fallback_num}] Error processing success response for {url}: {e}", exc_info=True); return None
	elif response.status_code == 400: logger.error(f"[{actor_name} - FB{fallback_num}] Bad Request (400) for {url}. Check run_input. Resp:{response.text[:200]}"); return None
	elif response.status_code == 401: logger.error(f"[{actor_name} - FB{fallback_num}] Auth error (401). Check token."); return None
	elif response.status_code == 404: logger.error(f"[{actor_name} - FB{fallback_num}] Endpoint/Actor Not Found (404). Actor: {actor_id} Resp:{response.text[:200]}"); return None
	else: logger.error(f"[{actor_name} - FB{fallback_num}] Unexpected status {response.status_code} for {url}. Resp:{response.text[:200]}"); return None
	except httpx.TimeoutException as e: logger.error(f"[{actor_name} - FB{fallback_num}] Timeout during API interaction for {url}: {e}"); return None
	except httpx.HTTPStatusError as e: logger.error(f"[{actor_name} - FB{fallback_num}] HTTP Status Error during API interaction for {url}: {e}"); return None
	except httpx.RequestError as e: logger.error(f"[{actor_name} - FB{fallback_num}] Request error during API interaction for {url}: {e}"); return None
	except Exception as e: logger.error(f"[{actor_name} - FB{fallback_num}] Unexpected error during {actor_name} call for {url}: {e}", exc_info=True); return None


	async def get_website_content_via_apify_crawler(url: str, api_token: str) -> Optional[str]:
	"""Fallback 5: Fetches website content using Apify Website Content Crawler."""
	return await _run_apify_actor_for_web_content(
	url=url, api_token=api_token, actor_id=APIFY_CRAWLER_ACTOR_ID,
	actor_name="Apify Crawler", fallback_num=5
	)

	async def get_website_content_via_apify_text_scraper(url: str, api_token: str) -> Optional[str]:
	"""Fallback 6: Fetches website content using Apify Text Scraper Free."""
	return await _run_apify_actor_for_web_content(
	url=url, api_token=api_token, actor_id=APIFY_TEXT_SCRAPER_ACTOR_ID,
	actor_name="Apify Text Scraper", fallback_num=6
	)


	# --- Summarization Functions (Unchanged) ---
	async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
	"""Internal function to call Gemini API. Returns (summary, error_message)."""
	global GEMINI_MODEL, _gemini_primary_enabled
	if not _gemini_primary_enabled:
	logger.error("[Gemini Primary] Called but is disabled.");
	return None, "Error: Primary AI service (Gemini) not configured/available."
	logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")

	if summary_type == "paragraph":
	prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
	"• Clear and simple language suitable for someone unfamiliar with the topic.\n"
	"• Uses British English spellings throughout.\n"
	"• Straightforward and understandable vocabulary; avoid complex terms.\n"
	"• Presented as ONE SINGLE PARAGRAPH.\n"
	"• No more than 85 words maximum; but does not have to be exactly 85.\n"
	"• Considers the entire text content equally.\n"
	"• Uses semicolons (;) instead of em dashes (– or —).\n"
	"• Focus ONLY on the main content; strictly EXCLUDE information about website features, subscriptions, ads, cookie notices, or navigation elements. Do not include things like free/paid tiers; basic/premium memberships. Especially for ACS membership.\n\n"
	"Here is the text to summarise:")
	else: # points summary
	prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this format:\n\n"
	"• For each distinct topic or section identified in the text, create a heading.\n"
	"• Each heading MUST be plain text without any formatting (e.g., Section Title).\n"
	"• Immediately following each heading, list the key points as a bulleted list.\n"
	"• Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n"
	"• The text within each bullet point should NOT contain any bold formatting.\n"
	"• IMPORTANT: Never use bold formatting (double asterisks) within the text of the bullet points themselves.\n"
	"• Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n"
	"• Use British English spellings throughout.\n"
	"• Avoid overly complex or advanced vocabulary.\n"
	"• Keep bullet points concise.\n"
	"• Ensure the entire summary takes no more than two minutes to read.\n"
	"• Consider the entire text's content, not just the beginning or a few topics.\n"
	"• Use semicolons (;) instead of em dashes (– or —).\n"
	"• Focus ONLY on the main content; strictly EXCLUDE information about website features, subscriptions, ads, cookie notices, or navigation elements. Do not include things like free/paid tiers; basic/premium memberships. Especially for ACS membership.\n\n"
	"Here is the text to summarise:")

	# Gemini 1.5 Flash context window is large, but let's keep a reasonable practical limit
	MAX_INPUT_LENGTH_GEMINI = 900000 # Approx 1M tokens
	if len(text) > MAX_INPUT_LENGTH_GEMINI:
	logger.warning(f"[Gemini Primary] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH_GEMINI}). Truncating.");
	text = text[:MAX_INPUT_LENGTH_GEMINI] + "... (Content truncated)"
	full_prompt = f"{prompt}\n\n{text}"

	safety_settings = { HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, }
	# Check if HARM_CATEGORY_CIVIC_INTEGRITY exists before adding (might vary by SDK version/region)
	# if hasattr(HarmCategory, 'HARM_CATEGORY_CIVIC_INTEGRITY'):
	# safety_settings[HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY] = HarmBlockThreshold.BLOCK_NONE
	logger.debug(f"[Gemini Primary] Using safety settings: { {k.name: v.name for k, v in safety_settings.items()} }")

	try:
	logger.debug(f"[Gemini Primary] Initializing model {GEMINI_MODEL}")
	model = genai.GenerativeModel(GEMINI_MODEL)
	logger.info(f"[Gemini Primary] Sending request to Gemini ({GEMINI_MODEL})...")
	request_options = {"timeout": 120} # 120 seconds timeout
	response = await model.generate_content_async(
	full_prompt,
	generation_config=genai.types.GenerationConfig(), # Use default generation config
	safety_settings=safety_settings,
	request_options=request_options
	)
	logger.info("[Gemini Primary] Received response from Gemini.")

	# Check for blocking based on prompt feedback first
	if response.prompt_feedback and response.prompt_feedback.block_reason:
	block_reason_str = getattr(response.prompt_feedback.block_reason, 'name', str(response.prompt_feedback.block_reason))
	logger.warning(f"[Gemini Primary] Request blocked by API based on prompt feedback. Reason: {block_reason_str}");
	return None, f"Sorry, the primary AI model ({GEMINI_MODEL}) blocked the request (Reason: {block_reason_str})."

	# If not blocked by prompt, check candidate content and finish reason
	summary = None
	finish_reason_str = 'UNKNOWN'
	safety_block_reason = None

	if response.candidates:
	candidate = response.candidates[0]
	finish_reason_enum = getattr(candidate, 'finish_reason', None)
	finish_reason_str = getattr(finish_reason_enum, 'name', 'N/A') if finish_reason_enum else 'N/A'

	if finish_reason_str == 'SAFETY':
	safety_ratings_str = "N/A"
	if hasattr(candidate, 'safety_ratings'):
	safety_ratings_str = ", ".join([f"{rating.category.name}: {rating.probability.name}" for rating in candidate.safety_ratings])
	safety_block_reason = f"SAFETY (Ratings: [{safety_ratings_str}])"
	logger.warning(f"[Gemini Primary] Candidate blocked due to SAFETY. Finish Reason: {finish_reason_str}. {safety_block_reason}")
	# Don't return yet, check if response.text fallback works

	elif finish_reason_str not in ['STOP', 'MAX_TOKENS', 'N/A', None]: # Log unusual reasons
	logger.warning(f"[Gemini Primary] Candidate finished with non-standard reason: {finish_reason_str}")

	# Try extracting text from the candidate parts
	if candidate.content and candidate.content.parts:
	summary = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))

	# Fallback to response.text if candidate parsing failed or was blocked (but prompt wasn't)
	if summary is None:
	try:
	# This might raise ValueError if the response was fully blocked (e.g., safety)
	summary = response.text
	if safety_block_reason: # If we got here despite a safety block, log it
	logger.warning(f"[Gemini Primary] Got text via response.text despite SAFETY block reason: {safety_block_reason}")
	except ValueError as e:
	logger.warning(f"[Gemini Primary] Error accessing response.text (likely blocked response): {e}. Final Finish Reason: {finish_reason_str}")
	summary = None # Ensure summary is None if .text fails

	# Final check and return
	if summary:
	logger.info(f"[Gemini Primary] Success generating summary. Finish Reason: {finish_reason_str}. Output len: {len(summary)}");
	return summary.strip(), None
	else:
	# Provide a more specific error if safety was the likely cause
	error_msg = f"Sorry, the primary AI model ({GEMINI_MODEL}) did not provide a summary (Finish Reason: {finish_reason_str})."
	if safety_block_reason:
	error_msg = f"Sorry, the primary AI model ({GEMINI_MODEL}) blocked the response due to safety filters ({finish_reason_str})."
	logger.warning(f"[Gemini Primary] Gemini returned empty summary or content was blocked. Final Finish Reason: {finish_reason_str}. Safety Block Reason: {safety_block_reason}");
	return None, error_msg

	except AttributeError as ae:
	# This might happen if the SDK response structure changes
	logger.error(f"[Gemini Primary] AttributeError during Gemini response processing: {ae}. SDK might be incompatible or response structure unexpected.", exc_info=True)
	return None, f"Sorry, error processing response from the primary AI ({GEMINI_MODEL})."
	except Exception as e:
	# Catch potential network errors, timeouts, etc.
	logger.error(f"[Gemini Primary] Unexpected error during Gemini API call: {e}", exc_info=True)
	return None, f"Sorry, unexpected error using primary AI ({GEMINI_MODEL})."

	async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
	"""Internal function to call OpenRouter API (Fallback). Returns (summary, error_message)."""
	global OPENROUTER_API_KEY, OPENROUTER_MODEL, _openrouter_fallback_enabled
	if not _openrouter_fallback_enabled:
	logger.error("[OpenRouter Fallback] Called but is disabled.");
	return None, "Error: Fallback AI service (OpenRouter) not configured/available."
	logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")

	if summary_type == "paragraph":
	prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
	"• Clear and simple language suitable for someone unfamiliar with the topic.\n"
	"• Uses British English spellings throughout.\n"
	"• Straightforward and understandable vocabulary; avoid complex terms.\n"
	"• Presented as ONE SINGLE PARAGRAPH.\n"
	"• No more than 85 words maximum; but does not have to be exactly 85.\n"
	"• Considers the entire text content equally.\n"
	"• Uses semicolons (;) instead of em dashes (– or —).\n"
	"• Focus ONLY on the main content; strictly EXCLUDE information about website features, subscriptions, ads, cookie notices, or navigation elements. Do not include things like free/paid tiers; basic/premium memberships. Especially for ACS membership.\n\n"
	"Here is the text to summarise:")
	else: # points summary
	prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this format:\n\n"
	"• For each distinct topic or section identified in the text, create a heading.\n"
	"• Each heading MUST be plain text without any formatting (e.g., Section Title).\n"
	"• Immediately following each heading, list the key points as a bulleted list.\n"
	"• Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n"
	"• The text within each bullet point should NOT contain any bold formatting.\n"
	"• IMPORTANT: Never use bold formatting (double asterisks) within the text of the bullet points themselves.\n"
	"• Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n"
	"• Use British English spellings throughout.\n"
	"• Avoid overly complex or advanced vocabulary.\n"
	"• Keep bullet points concise.\n"
	"• Ensure the entire summary takes no more than two minutes to read.\n"
	"• Consider the entire text's content, not just the beginning or a few topics.\n"
	"• Use semicolons (;) instead of em dashes (– or —).\n"
	"• Focus ONLY on the main content; strictly EXCLUDE information about website features, subscriptions, ads, cookie notices, or navigation elements. Do not include things like free/paid tiers; basic/premium memberships. Especially for ACS membership.\n\n"
	"Here is the text to summarise:")

	# Check model context window if known, otherwise use a generous limit
	# Deepseek Coder 33B has 16k context, let's aim lower for safety
	MAX_INPUT_LENGTH_OR = 60000 # Roughly 15k tokens
	if len(text) > MAX_INPUT_LENGTH_OR:
	logger.warning(f"[OpenRouter Fallback] Input length ({len(text)}) exceeds estimated limit ({MAX_INPUT_LENGTH_OR}) for {OPENROUTER_MODEL}. Truncating.");
	text = text[:MAX_INPUT_LENGTH_OR] + "... (Content truncated)"
	full_prompt = f"{prompt}\n\n{text}"

	headers = {
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"Content-Type": "application/json",
	# Optional, but good practice for OpenRouter identification
	"HTTP-Referer": "https://github.com/your-repo-or-app-name", # Replace with your repo/app URL
	"X-Title": "TelegramSummariserBot" # Replace with your app name
	}
	payload = {
	"model": OPENROUTER_MODEL,
	"messages": [{"role": "user", "content": full_prompt}]
	# Add optional parameters like temperature, max_tokens if needed
	# "temperature": 0.7,
	# "max_tokens": 1024,
	}
	openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
	# Increased read timeout as some models can take time
	api_timeouts = httpx.Timeout(connect=10.0, read=90.0, write=10.0, pool=60.0);
	response = None

	try:
	async with httpx.AsyncClient(timeout=api_timeouts) as client:
	logger.info(f"[OpenRouter Fallback] Sending request to OpenRouter ({OPENROUTER_MODEL})...")
	response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
	logger.info(f"[OpenRouter Fallback] Received response. Status: {response.status_code}")

	if response.status_code == 200:
	try:
	data = response.json()
	if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
	choice = data["choices"][0]
	message = choice.get("message")
	finish_reason = choice.get("finish_reason", "N/A")

	if message and isinstance(message, dict):
	summary = message.get("content")
	if summary:
	logger.info(f"[OpenRouter Fallback] Success. Finish: {finish_reason}. Output len: {len(summary)}")
	return summary.strip(), None
	else:
	# Model might return empty content successfully
	logger.warning(f"[OpenRouter Fallback] Success but content empty. Finish: {finish_reason}. Resp: {data}")
	return None, f"Fallback AI ({OPENROUTER_MODEL}) returned empty summary (Finish: {finish_reason})."
	else:
	logger.error(f"[OpenRouter Fallback] Unexpected message structure: {message}. Finish: {finish_reason}. Full: {data}")
	return None, "Could not parse fallback AI response (message format)."
	else:
	# Check for specific OpenRouter errors in the response body
	error_details = data.get("error", {})
	error_msg = error_details.get("message", "Unknown error in response structure")
	logger.error(f"[OpenRouter Fallback] Unexpected choices structure or error in response. Error: {error_msg}. Full: {data}")
	return None, f"Fallback AI response error: {error_msg}."
	except json.JSONDecodeError:
	logger.error(f"[OpenRouter Fallback] Failed JSON decode. Status:{response.status_code}. Resp:{response.text[:500]}")
	return None, "Failed to understand fallback AI response."
	except Exception as e:
	logger.error(f"[OpenRouter Fallback] Error processing success response: {e}", exc_info=True)
	return None, "Error processing fallback AI response."

	# Handle specific HTTP error codes
	elif response.status_code == 401:
	logger.error("[OpenRouter Fallback] API key invalid or missing (401).")
	return None, "Fallback AI authentication failed (check key)."
	elif response.status_code == 402:
	logger.error("[OpenRouter Fallback] Payment Required/Quota Exceeded (402).")
	return None, f"Fallback AI ({OPENROUTER_MODEL}) quota/limit reached."
	elif response.status_code == 429:
	logger.warning(f"[OpenRouter Fallback] Rate Limit Exceeded (429) for {OPENROUTER_MODEL}.")
	return None, f"Fallback AI ({OPENROUTER_MODEL}) is rate-limited. Try again later."
	elif response.status_code == 500:
	logger.error(f"[OpenRouter Fallback] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}")
	return None, f"Fallback AI service ({OPENROUTER_MODEL}) encountered an internal error."
	else:
	# General unexpected status code
	error_info = ""
	try: # Try to get error message from JSON response
	error_info = response.json().get("error", {}).get("message", "")
	except Exception: pass
	logger.error(f"[OpenRouter Fallback] Unexpected status {response.status_code}. Error: '{error_info}' Resp:{response.text[:500]}");
	return None, f"Fallback AI ({OPENROUTER_MODEL}) returned error status {response.status_code}."

	except httpx.TimeoutException as e:
	logger.error(f"[OpenRouter Fallback] Timeout error ({type(e)}) connecting to or reading from OpenRouter API: {e}")
	return None, f"Fallback AI ({OPENROUTER_MODEL}) timed out."
	except httpx.RequestError as e:
	logger.error(f"[OpenRouter Fallback] Request error connecting to OpenRouter API: {e}")
	return None, "Error connecting to fallback AI service."
	except Exception as e:
	logger.error(f"[OpenRouter Fallback] Unexpected error during OpenRouter call: {e}", exc_info=True)
	return None, "Unexpected error using fallback AI service."

	async def generate_summary(text: str, summary_type: str) -> str:
	"""Generates summary using Gemini (Primary) and falls back to OpenRouter if needed."""
	global _gemini_primary_enabled, _openrouter_fallback_enabled, GEMINI_MODEL, OPENROUTER_MODEL
	logger.info(f"[Summary Generation] Starting process. Primary: Gemini ({GEMINI_MODEL}), Fallback: OpenRouter ({OPENROUTER_MODEL})")
	final_summary: Optional[str] = None; primary_error_message: Optional[str] = None

	if _gemini_primary_enabled:
	logger.info(f"[Summary Generation] Attempting primary AI: Gemini ({GEMINI_MODEL})")
	final_summary, primary_error_message = await _call_gemini(text, summary_type)
	if final_summary: logger.info("[Summary Generation] Success with primary AI (Gemini)."); return final_summary
	else: logger.warning(f"[Summary Generation] Primary AI (Gemini) failed. Error: {primary_error_message}. Proceeding to fallback.")
	else: logger.warning("[Summary Generation] Primary AI (Gemini) disabled. Proceeding to fallback."); primary_error_message = "Primary AI (Gemini) unavailable."

	if _openrouter_fallback_enabled:
	logger.info(f"[Summary Generation] Attempting fallback AI: OpenRouter ({OPENROUTER_MODEL})")
	fallback_summary, fallback_error_message = await _call_openrouter(text, summary_type)
	if fallback_summary: logger.info("[Summary Generation] Success with fallback AI (OpenRouter)."); return fallback_summary
	else:
	logger.error(f"[Summary Generation] Fallback AI (OpenRouter) also failed. Error: {fallback_error_message}")
	# Construct a combined error message
	primary_err = primary_error_message or "Primary AI unavailable"
	fallback_err = fallback_error_message or "Fallback AI failed with unknown error"
	return f"Sorry, summarization failed.\nPrimary: {primary_err}\nFallback ({OPENROUTER_MODEL}): {fallback_err}"
	else:
	logger.error("[Summary Generation] Fallback AI (OpenRouter) disabled. Cannot proceed.")
	if primary_error_message: return f"{primary_error_message} Fallback AI is also unavailable."
	else: return "Error: Both primary and fallback AI services are unavailable."

	# This line should technically not be reached if logic is sound
	logger.error("[Summary Generation] Reached end of function unexpectedly.")
	return "Sorry, unknown error during summary generation."


	# --- Main Processing Logic (MODIFIED with Crawl4AI and re-ordered fallbacks) ---

	async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
	"""Handles the entire process: fetching content (Crawl4AI -> Fallbacks) and summarizing."""
	task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
	background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
	try:
	# Use longer timeouts for the background bot to handle potentially long scrapes/summaries
	background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=240.0, write_timeout=60.0, pool_timeout=240.0 )
	bot = Bot(token=bot_token, request=background_request)
	except Exception as e:
	logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True)
	# We cannot proceed without a bot instance
	return

	content: Optional[str] = None
	user_feedback_message: Optional[str] = None
	success: bool = False
	# Use the original button message ID if available, otherwise we'll send a new one
	status_message_id: Optional[int] = message_id_to_edit
	# Keep track if we sent a new message that needs deleting (vs editing the button message)
	new_status_message_id : Optional[int] = None

	try:
	# --- 1. Initial User Feedback ---
	processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nFetching content (using primary method... this might take a minute)..."
	if status_message_id:
	try:
	# Edit the message containing the buttons
	await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None )
	logger.debug(f"[Task {task_id}] Edited button message {status_message_id} to 'Processing'")
	except (BadRequest, TelegramError) as e:
	# Common errors: message not modified, message to edit not found, query too old
	logger.warning(f"[Task {task_id}] Could not edit original button message {status_message_id}: {e}. Will send a new status message.")
	status_message_id = None # Ensure we send a new message if edit fails
	except Exception as e:
	logger.error(f"[Task {task_id}] Unexpected error editing button message {status_message_id}: {e}. Will send new.", exc_info=True)
	status_message_id = None

	# If we couldn't edit the original message, send a new one
	if not status_message_id:
	try:
	status_message = await retry_bot_operation( bot.send_message, chat_id=chat_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN )
	if status_message:
	new_status_message_id = status_message.message_id
	logger.debug(f"[Task {task_id}] Sent new status message {new_status_message_id}")
	else:
	# This should ideally not happen due to retry_bot_operation, but handle defensively
	raise RuntimeError("Failed to send new status message after retries.")
	except Exception as e:
	# If we can't even send a status message, we can't proceed meaningfully
	logger.error(f"[Task {task_id}] CRITICAL: Failed to send initial status message: {e}. Aborting task.", exc_info=True)
	# Attempt to clean up the original button message if it exists
	if message_id_to_edit:
	try: await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=message_id_to_edit)
	except Exception: pass
	raise # Re-raise to be caught by outer try/finally

	# Determine which message ID to update/delete later
	message_to_update_id = new_status_message_id or status_message_id

	try:
	# --- 2. Content Fetching (Chain of methods) ---
	# Send typing indicator
	await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
	is_youtube = is_youtube_url(url); logger.debug(f"[Task {task_id}] URL type: {'YouTube' if is_youtube else 'Website'}")

	if is_youtube:
	# --- YouTube Transcript Logic (Unchanged from original) ---
	video_id = extract_youtube_id(url)
	if video_id:
	content = await get_youtube_transcript(video_id, url)
	else:
	user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
	# Set feedback message if transcript fetch failed
	if not content and not user_feedback_message:
	user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
	else:
	# --- Website Scraping Logic (NEW Order: Crawl4AI -> Fallbacks) ---
	global URLTOTEXT_API_KEY, RAPIDAPI_KEY, APIFY_API_TOKEN
	global _urltotext_key_exists, _rapidapi_key_exists, _apify_token_exists, _crawl4ai_primary_scrape_enabled

	# Method 0: Primary Scrape (Crawl4AI)
	logger.info(f"[Task {task_id}] Trying Web Scrape Method 0 (Primary: Crawl4AI)...")
	if _crawl4ai_primary_scrape_enabled:
	content = await get_website_content_via_crawl4ai(url)
	if content:
	logger.info(f"[Task {task_id}] Method 0 (Crawl4AI) succeeded.")
	else:
	logger.warning(f"[Task {task_id}] Method 0 (Crawl4AI) failed or returned insufficient content.")
	# Edit status message to indicate fallback attempt
	if message_to_update_id:
	try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=message_to_update_id, text="Primary scrape method failed, trying fallbacks...", parse_mode=ParseMode.MARKDOWN)
	except Exception: pass # Ignore if edit fails
	else:
	logger.warning(f"[Task {task_id}] Method 0 (Crawl4AI) skipped - library/driver unavailable.")
	# Edit status message
	if message_to_update_id:
	try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=message_to_update_id, text="Primary scrape method unavailable, trying fallbacks...", parse_mode=ParseMode.MARKDOWN)
	except Exception: pass

	# Method 1: Fallback 1 (Direct Fetch + BS4)
	if not content:
	logger.warning(f"[Task {task_id}] Method 0 failed/skipped. Trying Method 1 (Direct Fetch + BS4)...")
	await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
	content = await get_website_content_direct_bs4(url)
	if not content: logger.warning(f"[Task {task_id}] Method 1 (Direct Fetch + BS4) failed.")

	# Method 2: Fallback 2 (urltotext.com)
	if not content:
	logger.warning(f"[Task {task_id}] Method 1 failed. Trying Method 2 (urltotext.com)...")
	if _urltotext_key_exists:
	await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
	content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
	if not content: logger.warning(f"[Task {task_id}] Method 2 (urltotext.com) failed.")
	else: logger.warning(f"[Task {task_id}] Method 2 (urltotext.com) API key unavailable. Skipping.")

	# Method 3: Fallback 3 (Scraper's Proxy via RapidAPI)
	if not content:
	logger.warning(f"[Task {task_id}] Method 2 failed. Trying Method 3 (Scraper's Proxy)...")
	if _rapidapi_key_exists:
	await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
	content = await get_website_content_via_scrapers_proxy(url, RAPIDAPI_KEY)
	if not content: logger.warning(f"[Task {task_id}] Method 3 (Scraper's Proxy) failed.")
	else: logger.warning(f"[Task {task_id}] Method 3 (Scraper's Proxy) RapidAPI key unavailable. Skipping.")

	# Method 4: Fallback 4 (AI Web Scraper via RapidAPI)
	if not content:
	logger.warning(f"[Task {task_id}] Method 3 failed. Trying Method 4 (AI Web Scraper)...")
	if _rapidapi_key_exists:
	await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
	content = await get_website_content_via_ai_web_scraper(url, RAPIDAPI_KEY)
	if not content: logger.warning(f"[Task {task_id}] Method 4 (AI Web Scraper) failed.")
	else: logger.warning(f"[Task {task_id}] Method 4 (AI Web Scraper) RapidAPI key unavailable. Skipping.")

	# Method 5: Fallback 5 (Apify Website Content Crawler)
	if not content:
	logger.warning(f"[Task {task_id}] Method 4 failed. Trying Method 5 (Apify Crawler)...")
	if _apify_token_exists:
	await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
	content = await get_website_content_via_apify_crawler(url, APIFY_API_TOKEN)
	if not content: logger.warning(f"[Task {task_id}] Method 5 (Apify Crawler) failed.")
	else: logger.warning(f"[Task {task_id}] Method 5 (Apify Crawler) APIFY_API_TOKEN unavailable. Skipping.")

	# Method 6: Fallback 6 (Apify Text Scraper Free)
	if not content:
	logger.warning(f"[Task {task_id}] Method 5 failed. Trying Method 6 (Apify Text Scraper)...")
	if _apify_token_exists:
	await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
	content = await get_website_content_via_apify_text_scraper(url, APIFY_API_TOKEN)
	if not content: logger.warning(f"[Task {task_id}] Method 6 (Apify Text Scraper) failed.")
	else: logger.warning(f"[Task {task_id}] Method 6 (Apify Text Scraper) APIFY_API_TOKEN unavailable. Skipping.")

	# Final check if all website methods failed
	if not content and not user_feedback_message:
	logger.error(f"[Task {task_id}] All web scraping methods failed for {url}.")
	user_feedback_message = "Sorry, I couldn't fetch readable content from that website using multiple methods (blocked/dynamic content/empty?). Even the advanced crawler failed."

	# --- 3. Summarization ---
	if content:
	logger.info(f"[Task {task_id}] Content fetched successfully (len:{len(content)}). Generating '{summary_type}' summary.")
	# Update status message before starting potentially long summary generation
	if message_to_update_id:
	try:
	await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=message_to_update_id, text=f"Content fetched! Now generating '{summary_type}' summary with AI...", parse_mode=ParseMode.MARKDOWN, reply_markup=None )
	except Exception as edit_e:
	logger.warning(f"[Task {task_id}] Failed to edit status message before summary generation: {edit_e}")

	# Send typing indicator again for summary generation
	await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
	final_summary = await generate_summary(content, summary_type)

	# Check if summary generation itself returned an error message
	if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
	user_feedback_message = final_summary # Use the error message from generate_summary
	logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
	else:
	# Summary successful, send it (potentially in parts)
	max_length = 4096 # Telegram message length limit
	if len(final_summary) <= max_length:
	await retry_bot_operation( bot.send_message, chat_id=chat_id, text=final_summary, parse_mode=None, link_preview_options={'is_disabled': True} )
	else:
	# Split into parts
	summary_parts = []
	current_part = ""
	for line in final_summary.splitlines(keepends=True):
	if len(current_part) + len(line) > max_length:
	summary_parts.append(current_part)
	current_part = line
	else:
	current_part += line
	if current_part: # Add the last part
	summary_parts.append(current_part)

	logger.info(f"[Task {task_id}] Summary too long ({len(final_summary)} chars), splitting into {len(summary_parts)} parts.")
	for i, part in enumerate(summary_parts):
	await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=None, link_preview_options={'is_disabled': True} )
	if i < len(summary_parts) - 1:
	await asyncio.sleep(0.7) # Short delay between parts

	success = True
	logger.info(f"[Task {task_id}] Successfully sent summary.")
	user_feedback_message = None # Clear any previous potential error message

	# --- 4. Handle Final Failure Feedback ---
	# If we have a user_feedback_message set at this point, it means something failed
	# (either content fetching or summarization)
	if user_feedback_message:
	logger.warning(f"[Task {task_id}] Process failed. Sending failure feedback: {user_feedback_message}")
	# Send the failure message as a new message
	await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )

	except Exception as e:
	# Catch any unexpected errors during the main processing block
	logger.error(f"[Task {task_id}] Unexpected error during core processing: {e}", exc_info=True)
	user_feedback_message = "Oops! Something went wrong while processing your request. Please try again later."
	try:
	# Try to send a generic error message
	await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
	except Exception as feedback_err:
	logger.error(f"[Task {task_id}] Failed even to send the generic error feedback message: {feedback_err}")
	success = False # Ensure success is false

	except Exception as outer_e:
	# Catch critical errors (like failure to send initial status message)
	logger.critical(f"[Task {task_id}] Critical outer error prevented task execution: {outer_e}", exc_info=True)
	try:
	if bot: # Check if bot was initialized
	await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ A critical internal error occurred. I couldn't process your request." )
	except Exception as crit_feedback_err:
	logger.exception(f"[Task {task_id}] Failed even to send the critical error message: {crit_feedback_err}")
	success = False # Ensure success is false
	finally:
	# --- 5. Cleanup ---
	# Delete the status message we were updating (either the original button message or the new one we sent)
	delete_target_id = new_status_message_id if new_status_message_id else status_message_id
	if delete_target_id and bot:
	try:
	await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id)
	logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
	except (BadRequest, TelegramError) as del_e:
	# Ignore errors like "message to delete not found"
	if "not found" not in str(del_e).lower():
	logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
	except Exception as del_e:
	logger.warning(f"[Task {task_id}] Unexpected error deleting status/button message {delete_target_id}: {del_e}")

	# Close the background bot's HTTPX client if it was created
	if background_request and hasattr(background_request, '_client') and background_request._client:
	try:
	await background_request._client.aclose()
	logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
	except Exception as close_e:
	logger.warning(f"[Task {task_id}] Error closing background bot's client: {close_e}")

	logger.info(f"[Task {task_id}] Task finished. Overall Success: {success}")


	# --- Telegram Handlers ---
	async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
	user = update.effective_user; mention = user.mention_html()
	if not user or not update.message: return
	logger.info(f"User {user.id} ({user.username or 'no_username'}) used /start.")
	await update.message.reply_html( f"👋 Hello {mention}! I can summarise YouTube links or website URLs.\n\nJust send me a link anytime!" )

	async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
	user = update.effective_user
	if not user or not update.message: return
	logger.info(f"User {user.id} ({user.username or 'no_username'}) used /help.")
	help_text = ( "🔍 How to use this bot:\n\n"
	"1. Send me any YouTube video link or website URL.\n"
	"2. I'll ask how you want it summarised (paragraph or points).\n"
	"3. Click the button for your choice.\n"
	"4. Wait while I fetch the content and generate the summary!\n\n"
	"⚙️ Website Scraping: I use an advanced web crawler (`crawl4ai`) first. If that doesn't work, I'll try several fallback methods (direct fetch, APIs) to get the text.\n"
	"📺 YouTube: I try the official library first, then fall back to APIs if needed.\n"
	"🤖 Summaries: I use Google Gemini primarily, with OpenRouter as a backup.\n\n"
	"Commands:\n"
	"`/start` - Display the welcome message\n"
	"`/help` - Show this help message" )
	await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)

	async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
	if not update.message or not update.message.text: return
	message_text = update.message.text.strip(); user = update.effective_user
	if not user: return

	# More robust URL extraction using regex - finds the first http(s) link
	url_pattern = re.compile(r"https?://[^\s/$.?#].[^\s]*", re.IGNORECASE)
	match = url_pattern.search(message_text)

	if match:
	extracted_url = match.group(0)
	# Clean potential trailing characters like periods or parentheses if message contained more text
	extracted_url = extracted_url.rstrip(').,')
	logger.info(f"User {user.id} ({user.username or 'no_username'}) sent potential URL: {extracted_url}")

	# Store URL and original message ID in user_data for the callback
	context.user_data['url_to_summarize'] = extracted_url
	context.user_data['original_message_id'] = update.message.message_id # Store original message ID if needed later

	keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
	reply_markup = InlineKeyboardMarkup(keyboard)
	try:
	# Reply to the original message
	await update.message.reply_text(
	f"Okay, I see this link:\n`{extracted_url}`\n\nHow would you like it summarised?",
	reply_markup=reply_markup,
	disable_web_page_preview=True,
	parse_mode=ParseMode.MARKDOWN
	)
	except BadRequest as e:
	if "chat not found" in str(e).lower() or "bot was blocked by the user" in str(e).lower():
	logger.warning(f"Could not reply to user {user.id} (chat not found or blocked).")
	else:
	logger.error(f"BadRequest replying to URL message from {user.id}: {e}")
	except Exception as e:
	logger.error(f"Error replying to URL message from {user.id}: {e}", exc_info=True)
	else:
	# If the message filter passed but regex didn't find a URL, log it but don't reply
	logger.debug(f"Ignoring message from {user.id} - Entity filter matched but no URL found by regex: {message_text[:100]}")


	async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
	query = update.callback_query
	if not query or not query.message or not query.from_user:
	logger.warning("Callback query received without essential data.")
	# Attempt to answer the query even if we can't process it, to remove the loading indicator
	if query:
	try: await query.answer("Error: Missing data.", show_alert=True)
	except Exception: pass
	return

	user = query.from_user
	summary_type = query.data
	query_id = query.id
	chat_id = query.message.chat_id
	message_id_to_edit = query.message.message_id # This is the message with the buttons

	try:
	# Acknowledge the button press quickly
	await query.answer()
	logger.debug(f"Acknowledged callback {query_id} from {user.id} for summary type '{summary_type}'")
	except BadRequest as e:
	if "query is too old" in str(e).lower():
	logger.warning(f"Callback query {query_id} is too old to answer. User might have double-clicked or waited too long.")
	# Optionally edit the message to indicate the issue if possible
	try: await query.edit_message_text(text="This request is too old. Please send the link again.", reply_markup=None)
	except Exception: pass
	return # Stop processing if the query is too old
	else:
	# Log other BadRequest errors but attempt to continue if acknowledging failed
	logger.error(f"Error answering callback {query_id}: {e}", exc_info=True)
	except Exception as e:
	logger.error(f"Unexpected error answering callback {query_id}: {e}", exc_info=True)
	# Attempt to continue processing even if answering failed

	# Retrieve the URL stored in user_data
	url = context.user_data.get('url_to_summarize')
	logger.info(f"User {user.id} chose '{summary_type}' for button message {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")

	if not url:
	logger.warning(f"No URL found in context for user {user.id} (callback query {query_id}). Button might be old or context lost.")
	try:
	# Edit the button message to inform the user
	await query.edit_message_text(
	text="Sorry, I couldn't find the original URL for this request (it might be too old or the bot restarted). Please send the link again.",
	reply_markup=None # Remove buttons
	)
	except (BadRequest, TelegramError) as edit_e:
	# Ignore errors like "message is not modified" or "message to edit not found"
	if "not modified" not in str(edit_e).lower() and "not found" not in str(edit_e).lower():
	logger.warning(f"Failed to edit 'URL not found' message {message_id_to_edit} for user {user.id}: {edit_e}")
	except Exception as edit_e:
	logger.warning(f"Error editing 'URL not found' message {message_id_to_edit} for user {user.id}: {edit_e}")
	return # Stop processing if URL is missing

	# Clear the URL from context once retrieved to prevent accidental reuse
	context.user_data.pop('url_to_summarize', None)
	context.user_data.pop('original_message_id', None) # Clear original message ID too
	logger.debug(f"Cleared URL context for user {user.id}")

	# --- Pre-task Checks ---
	global TELEGRAM_TOKEN, _gemini_primary_enabled, _openrouter_fallback_enabled
	if not TELEGRAM_TOKEN:
	logger.critical("FATAL: TELEGRAM_TOKEN missing when trying to start background task!")
	try: await query.edit_message_text(text="❌ Critical Bot Configuration Error (Missing Token). Cannot proceed.", reply_markup=None)
	except Exception: pass
	return

	if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
	logger.critical("FATAL: Neither Gemini nor OpenRouter API keys are configured/valid when trying to start background task!")
	try: await query.edit_message_text(text="❌ Critical AI Configuration Error: No summarization models available. Cannot proceed.", reply_markup=None)
	except Exception: pass
	return
	elif not _gemini_primary_enabled:
	logger.warning("Primary AI (Gemini) unavailable, relying solely on fallback for this task.")
	elif not _openrouter_fallback_enabled:
	logger.warning("Fallback AI (OpenRouter) unavailable, relying solely on primary for this task.")

	# --- Schedule Background Task ---
	logger.info(f"Scheduling background task for user {user.id}, chat {chat_id}, button message {message_id_to_edit}, url: {url[:60]}...")
	asyncio.create_task(
	process_summary_task(
	user_id=user.id,
	chat_id=chat_id,
	message_id_to_edit=message_id_to_edit, # Pass the button message ID
	url=url,
	summary_type=summary_type,
	bot_token=TELEGRAM_TOKEN
	),
	# Name the task for easier debugging if needed
	name=f"SummaryTask-{user.id}-{message_id_to_edit}"
	)
	# Note: The process_summary_task will handle editing/deleting the message_id_to_edit

	async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
	"""Log Errors caused by Updates."""
	logger.error("Exception while handling an update:", exc_info=context.error)
	# Optionally add more context if 'update' is an Update object
	if isinstance(update, Update) and update.effective_chat:
	logger.error(f"Error occurred in chat {update.effective_chat.id}")


	# --- Application Setup & Web Framework ---

	async def setup_bot_config() -> Application:
	logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
	if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
	# Configure HTTPX request settings for the main PTB application
	custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
	application = Application.builder().token(TELEGRAM_TOKEN).request(custom_request).build()
	# --- Add Handlers ---
	application.add_handler(CommandHandler("start", start))
	application.add_handler(CommandHandler("help", help_command))
	# Use a filter that catches messages containing URL entities
	url_filter = filters.Entity("url") \| filters.Entity("text_link")
	application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND & url_filter, handle_potential_url))
	# Handler for button clicks (summary type selection)
	application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
	# Error handler
	application.add_error_handler(error_handler)
	logger.info("Telegram application handlers configured."); return application

	@contextlib.asynccontextmanager
	async def lifespan(app: Starlette):
	global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
	logger.info("ASGI Lifespan: Startup initiated...");
	if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
	bot_setup_successful = False
	webhook_set = False
	try:
	ptb_app = await setup_bot_config()
	await ptb_app.initialize()
	bot_info = await ptb_app.bot.get_me()
	logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
	bot_setup_successful = True # Mark bot setup as successful here

	# --- Webhook Setup ---
	# Check and delete existing webhook first
	current_webhook_info = await ptb_app.bot.get_webhook_info()
	if current_webhook_info and current_webhook_info.url:
	logger.info(f"Found existing webhook: {current_webhook_info.url}. Attempting to delete...")
	try:
	if await ptb_app.bot.delete_webhook(drop_pending_updates=True):
	logger.info("Existing webhook deleted successfully.")
	else:
	# API returned False, might not be critical but worth noting
	logger.warning("Attempt to delete existing webhook returned False from API.")
	except Exception as e:
	logger.warning(f"Could not delete existing webhook (Error: {e}). Proceeding with setting new webhook.", exc_info=True)
	await asyncio.sleep(1) # Short delay after potential delete

	# Determine webhook URL (assuming deployment provides SPACE_HOST)
	space_host = os.environ.get("SPACE_HOST")
	if not space_host:
	logger.critical("SPACE_HOST environment variable not found. Cannot set webhook.")
	raise RuntimeError("SPACE_HOST environment variable missing.")

	webhook_path = "/webhook" # Matches the route defined later
	# Ensure correct protocol and clean host formatting
	protocol = "https"
	host = space_host.split('://')[-1].rstrip('/') # Remove trailing slashes
	full_webhook_url = f"{protocol}://{host}{webhook_path}"
	logger.info(f"Calculated webhook URL: {full_webhook_url}")

	# Set the new webhook
	set_webhook_args = {
	"url": full_webhook_url,
	"allowed_updates": Update.ALL_TYPES, # Receive all update types
	"drop_pending_updates": True # Ignore updates while bot was down
	}
	if WEBHOOK_SECRET:
	set_webhook_args["secret_token"] = WEBHOOK_SECRET
	logger.info("Webhook secret token will be used.")
	else:
	logger.info("No webhook secret token configured.")

	# Give network/DNS a moment before setting
	await asyncio.sleep(1.5)
	logger.info(f"Attempting to set webhook to: {full_webhook_url} with args: {set_webhook_args}")
	await ptb_app.bot.set_webhook(**set_webhook_args)

	# Verify webhook setup
	await asyncio.sleep(1) # Allow time for info propagation
	new_webhook_info = await ptb_app.bot.get_webhook_info()
	if new_webhook_info.url == full_webhook_url:
	logger.info(f"Webhook successfully set: URL='{new_webhook_info.url}', Secret Token Set={bool(WEBHOOK_SECRET)}")
	webhook_set = True
	else:
	logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', but GET response shows '{new_webhook_info.url}'. Check firewall/proxy/platform settings.")
	# Decide whether to raise an error or try to continue
	# For now, let's raise an error as webhook is critical
	raise RuntimeError("Failed to verify webhook URL after setting.")

	# Start the PTB application processing
	await ptb_app.start()
	logger.info("PTB Application started and polling for updates via webhook.")

	logger.info("ASGI Lifespan: Startup complete."); yield # Application runs here

	except Exception as startup_err:
	logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
	# Attempt cleanup even if startup failed partially
	if ptb_app and bot_setup_successful:
	if ptb_app.running:
	try: await ptb_app.stop()
	except Exception as stop_err: logger.error(f"Error stopping PTB app during failed startup: {stop_err}")
	# Try to delete webhook if it was potentially set
	if webhook_set:
	try:
	logger.info("Attempting to delete webhook due to startup failure...")
	await ptb_app.bot.delete_webhook(drop_pending_updates=True)
	logger.info("Webhook deleted during failed startup cleanup.")
	except Exception as del_wh_err: logger.error(f"Failed to delete webhook during failed startup cleanup: {del_wh_err}")
	try: await ptb_app.shutdown()
	except Exception as shutdown_err: logger.error(f"Error shutting down PTB app during failed startup: {shutdown_err}")
	raise # Re-raise the original startup error
	finally:
	# --- Shutdown Logic ---
	logger.info("ASGI Lifespan: Shutdown initiated...")
	if ptb_app and bot_setup_successful:
	# Stop PTB app first
	if ptb_app.running:
	logger.info("Stopping PTB Application processing...")
	try: await ptb_app.stop()
	except Exception as e: logger.error(f"Error stopping PTB application: {e}")
	else: logger.info("PTB Application was not running.")

	# Delete webhook before shutting down fully
	try:
	logger.info("Attempting to delete webhook on shutdown...")
	if ptb_app.bot and hasattr(ptb_app.bot, 'delete_webhook'):
	# Check if webhook is actually set before trying to delete
	current_wh_info = await ptb_app.bot.get_webhook_info()
	if current_wh_info and current_wh_info.url:
	if await ptb_app.bot.delete_webhook(drop_pending_updates=True):
	logger.info("Webhook deleted successfully on shutdown.")
	else:
	logger.warning("Failed to delete webhook on shutdown (API returned False).")
	else:
	logger.info("No webhook was set, skipping deletion.")
	else:
	logger.warning("Cannot delete webhook: Bot object unavailable or doesn't support delete_webhook.")
	except Exception as e:
	logger.warning(f"Could not delete webhook during shutdown: {e}", exc_info=False)

	# Shutdown PTB application resources
	logger.info("Shutting down PTB Application resources...")
	try: await ptb_app.shutdown()
	except Exception as e: logger.error(f"Error during PTB application shutdown: {e}")
	logger.info("PTB Application shut down.")
	else:
	logger.info("PTB app not fully initialized or setup failed. Skipping PTB shutdown steps.")
	logger.info("ASGI Lifespan: Shutdown complete.")


	async def health_check(request: Request) -> PlainTextResponse:
	"""Simple health check endpoint."""
	global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled
	global _urltotext_key_exists, _rapidapi_key_exists, SUPADATA_API_KEY, _crawl4ai_primary_scrape_enabled
	bot_status = "Not Initialized"; bot_username = "N/A"

	if ptb_app and ptb_app.bot:
	try:
	# Check if the application is running (processing updates)
	app_running = ptb_app.running
	# Try to get bot info regardless of running state if bot object exists
	bot_info = await ptb_app.bot.get_me()
	bot_username = f"@{bot_info.username}" if bot_info and bot_info.username else "Info Fetch Error"

	if app_running:
	bot_status = "Running"
	else:
	# If initialized but not running (e.g., during startup/shutdown)
	bot_status = "Initialized (Not Processing Updates)"

	except (TimedOut, NetworkError) as net_err:
	bot_status = f"Network Error checking status: {type(net_err).__name__}"
	bot_username = "N/A (Network Error)"
	logger.warning(f"Health check: Network error getting bot info: {net_err}")
	except Exception as e:
	bot_status = f"Error checking status: {type(e).__name__}"
	bot_username = "N/A (Error)"
	logger.warning(f"Health check: Error getting bot info: {e}", exc_info=False)
	elif ptb_app:
	bot_status = "Initialized (Bot object missing?)"
	bot_username = "N/A"
	else:
	bot_status = "Not Initialized"
	bot_username = "N/A"

	# Construct the response string
	response_lines = [
	f"TG Bot Summariser - Status: {bot_status} ({bot_username})",
	"--- Summarization ---",
	f"Primary Model (Gemini): {GEMINI_MODEL if _gemini_primary_enabled else 'DISABLED'}",
	f"Fallback Model (OpenRouter): {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}",
	"--- YouTube Transcripts ---",
	"Primary (Lib): Enabled",
	f"Fallback 1 (Supadata): {'Enabled' if SUPADATA_API_KEY else 'Disabled (Key Missing)'}",
	f"Fallback 2 (Apify Actor): {APIFY_ACTOR_ID if _apify_token_exists else 'DISABLED (Token Missing)'}",
	"--- Website Scraping ---",
	f"Primary (Crawl4AI): {'Enabled' if _crawl4ai_primary_scrape_enabled else 'DISABLED (Library/Driver Missing?)'}",
	"Fallback 1 (Direct+BS4): Enabled",
	f"Fallback 2 (urltotext): {'Enabled' if _urltotext_key_exists else 'Disabled (Key Missing)'}",
	f"Fallback 3/4 (RapidAPI): {'Enabled' if _rapidapi_key_exists else 'Disabled (Key Missing)'}",
	f"Fallback 5/6 (Apify Actors): {'Enabled' if _apify_token_exists else 'Disabled (Token Missing)'}"
	]

	return PlainTextResponse("\n".join(response_lines))


	async def telegram_webhook(request: Request) -> Response:
	"""Handles incoming updates from Telegram."""
	global ptb_app, WEBHOOK_SECRET # Ensure ptb_app is accessible

	# --- Basic Checks ---
	if not ptb_app:
	logger.error("Webhook received but PTB application is not initialized.")
	return PlainTextResponse('Bot application not initialized', status_code=503) # Service Unavailable

	if not ptb_app.bot:
	logger.error("Webhook received but PTB bot object is not available.")
	return PlainTextResponse('Bot object not available', status_code=503)

	if not ptb_app.running:
	logger.warning("Webhook received but PTB application is not running (likely startup/shutdown).")
	# Return 200 OK to Telegram to prevent retries, but log the warning.
	return PlainTextResponse('Bot not actively processing', status_code=200)

	# --- Security Check (Secret Token) ---
	if WEBHOOK_SECRET:
	token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
	if not token_header:
	logger.warning("Webhook received request MISSING secret token header, but one is configured.")
	return Response(content="Forbidden: Missing secret token", status_code=403)
	if token_header != WEBHOOK_SECRET:
	logger.warning(f"Webhook received INVALID secret token. Header: '{token_header[:5]}...'")
	return Response(content="Forbidden: Invalid secret token", status_code=403)
	# If token matches, proceed

	# --- Process Update ---
	try:
	update_data = await request.json()
	update = Update.de_json(data=update_data, bot=ptb_app.bot)
	logger.debug(f"Processing update_id: {update.update_id} via webhook")
	# Use PTB's built-in update processing queue
	await ptb_app.process_update(update)
	# Return 200 OK to Telegram quickly after queuing the update
	return Response(status_code=200)
	except json.JSONDecodeError:
	logger.error("Webhook received invalid JSON data.")
	return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
	except Exception as e:
	# Log the error, but return 200 OK to prevent Telegram from resending the faulty update
	logger.error(f"Error processing webhook update: {e}", exc_info=True)
	return Response(status_code=200)

	# --- Starlette App Definition ---
	app = Starlette(
	debug=False, # Set to False for production
	lifespan=lifespan,
	routes=[
	Route("/", endpoint=health_check, methods=["GET"]),
	Route("/webhook", endpoint=telegram_webhook, methods=["POST"]),
	]
	)
	logger.info("Starlette ASGI application created with health check ('/') and Telegram webhook ('/webhook') routes.")

	# --- Development Server & Playwright Check ---
	if __name__ == '__main__':
	import uvicorn
	logger.warning("Running in development mode using Uvicorn directly - NOT recommended for production!")

	# Check for Playwright installation on startup in dev mode
	playwright_installed = False
	try:
	from playwright.async_api import async_playwright
	playwright_installed = True
	logger.info("Playwright library found.")
	# Optional: Add playwright install command here if needed for dev
	# Consider running `playwright install --with-deps` manually in your dev env
	except ImportError:
	logger.critical("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
	logger.critical("Playwright library not found. Crawl4AI (Primary Scraper) WILL FAIL.")
	logger.critical("Install it: pip install playwright")
	logger.critical("Then install browsers: playwright install --with-deps")
	logger.critical("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")

	# Check Crawl4AI explicitly
	if not _crawl4ai_available:
	logger.critical("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
	logger.critical("Crawl4AI library not found. Primary Scraper WILL BE DISABLED.")
	logger.critical("Install it: pip install crawl4ai")
	logger.critical("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
	elif not playwright_installed:
	logger.warning("Crawl4AI is installed, but Playwright is missing. Crawl4AI will likely fail without Playwright drivers.")

	# Get log level and port from environment or use defaults
	log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
	local_port = int(os.environ.get('PORT', 8080)) # Use PORT env var, default 8080

	# Run Uvicorn
	uvicorn.run(
	"__main__:app",
	host='0.0.0.0', # Listen on all interfaces
	port=local_port,
	log_level=log_level,
	reload=True # Enable auto-reload for development
	)