Spaces:

fmab777
/

telegram-summary-bot

Running

App Files Files Community

fmab777 commited on 28 days ago

Commit

3ac7b5f

verified ·

1 Parent(s): 2ebc056

Update main.py

Browse files

Files changed (1) hide show

main.py +502 -768

main.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# main.py (Applying fixes for apparent_encoding, bot cleanup, and Apify actor name)
 import os
 import re
 import logging
@@ -27,32 +27,20 @@ from telegram.ext import (
 )
 from telegram.constants import ParseMode
 from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest, TelegramError
-from telegram.request import HTTPXRequest, BaseRequest # Import BaseRequest for type hinting
 # --- Other Libraries ---
-import httpx
 from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
-# Make requests optional if only used for sync fallback (currently not)
-# import requests
 from bs4 import BeautifulSoup
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type, before_sleep_log
-# Optional: Import lxml if installed (usually faster parsing)
 try:
     import lxml
     DEFAULT_PARSER = 'lxml'
 except ImportError:
     DEFAULT_PARSER = 'html.parser'
-_apify_token_exists = bool(os.environ.get('APIFY_API_TOKEN'))
-if _apify_token_exists:
-    from apify_client import ApifyClient
-    from apify_client.consts import ActorJobStatus
-    from apify_client.errors import ApifyApiError # Import specific error
-else:
-    ApifyClient = None # type: ignore
-    ApifyApiError = None # type: ignore
 # --- Logging Setup ---
 logging.basicConfig(
@@ -60,7 +48,7 @@ logging.basicConfig(
     level=logging.INFO
 )
 logging.getLogger("httpx").setLevel(logging.WARNING)
-if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
 logging.getLogger("telegram.ext").setLevel(logging.INFO)
 logging.getLogger('telegram.bot').setLevel(logging.INFO)
 logging.getLogger("urllib3").setLevel(logging.INFO)
@@ -73,11 +61,10 @@ logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
 # --- Global variable for PTB app ---
 ptb_app: Optional[Application] = None
-# --- Environment Variable Loading ---
-logger.info("Attempting to load secrets...")
 def get_secret(secret_name):
     value = os.environ.get(secret_name)
-    # Avoid logging full length of very long secrets like Supabase keys
     log_length = min(len(value), 8) if value else 0
     status = "Found" if value else "Not Found"
     logger.info(f"Secret '{secret_name}': {status} (Value starts with: {value[:log_length]}...)")
@@ -85,904 +72,651 @@ def get_secret(secret_name):
 TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
 OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY')
-URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
 SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
-APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
-WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET') # Added for webhook security
-OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "anthropic/claude-3.5-sonnet")
-APIFY_ACTOR_NAME = os.environ.get("APIFY_ACTOR_NAME", "pocesar/youtube-scraper") # Use env var or default
-logger.info("Secret loading attempt finished.")
 logger.info(f"Using OpenRouter Model: {OPENROUTER_MODEL}")
-if _apify_token_exists:
-    logger.info(f"Using Apify Actor: {APIFY_ACTOR_NAME}")
-# --- Retry Decorator for Bot Operations ---
-@retry(
-    stop=stop_after_attempt(4),
-    wait=wait_exponential(multiplier=1, min=2, max=15),
-    retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)),
-    before_sleep=before_sleep_log(logger, logging.WARNING),
-    reraise=True
-)
 async def retry_bot_operation(func, *args, **kwargs):
-    """Wrapper to retry bot operations with exponential backoff."""
-    try:
-        return await func(*args, **kwargs)
     except BadRequest as e:
-        # Added specific check for common, non-fatal BadRequests
-        ignore_errors = [
-             "message is not modified",
-             "query is too old",
-             "message to edit not found",
-             "chat not found", # Might indicate user blocked bot, non-retryable
-             "bot was blocked by the user",
-        ]
-        if any(err in str(e).lower() for err in ignore_errors):
-            logger.warning(f"Ignoring non-critical BadRequest during bot operation: {e}")
-            return None # Indicate no action needed or failed benignly
-        logger.error(f"Potentially critical BadRequest during bot operation: {e}")
-        raise # Reraise other BadRequests (might be retryable by tenacity)
-    except TelegramError as e:
-        logger.warning(f"TelegramError during bot operation (will retry if applicable): {e}")
-        raise
-    except Exception as e:
-        logger.error(f"Unexpected error during bot operation: {e}", exc_info=True)
-        raise
-# --- Helper Functions ---
 def is_youtube_url(url):
-    """Checks if the URL is a valid YouTube video or shorts URL."""
-    youtube_regex = re.compile(
-        r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/'
-        r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?'
-        r'([\w-]{11})'
-        r'(?:\S+)?',
-        re.IGNORECASE)
-    match = youtube_regex.search(url)
-    logger.debug(f"is_youtube_url check for '{url}': {'Match found' if match else 'No match'}")
-    return bool(match)
 def extract_youtube_id(url):
-    """Extracts the YouTube video ID from a URL."""
-    youtube_regex = re.compile(
-        r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/'
-        r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?'
-        r'([\w-]{11})'
-        r'(?:\S+)?',
-        re.IGNORECASE)
     match = youtube_regex.search(url)
-    if match:
-        video_id = match.group(1)
-        logger.debug(f"Extracted YouTube ID '{video_id}' from URL: {url}")
-        return video_id
-    else:
-        logger.warning(f"Could not extract YouTube ID from URL: {url}")
-        return None
-# --- Content Fetching Functions ---
-# Using httpx for async requests
-async def fetch_url_content(url: str, timeout: int = 20) -> Optional[str]:
-    """Fetches content from a URL using httpx asynchronously."""
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', # Updated UA
-        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
-        'Accept-Language': 'en-US,en;q=0.9',
-        'Connection': 'keep-alive',
     }
     try:
-        async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers, http2=True) as client: # Enable HTTP/2
             response = await client.get(url)
             response.raise_for_status()
-            # *** FIX: Use response.encoding or response.charset_encoding ***
-            # response.encoding will try to decode based on headers/content
-            # If it fails, default to utf-8
-            try:
-                 # Accessing .text forces encoding detection
-                 content = response.text
-                 logger.debug(f"Detected encoding for {url}: {response.encoding}")
-                 return content
-            except UnicodeDecodeError:
-                 logger.warning(f"UnicodeDecodeError for {url} with encoding {response.encoding}. Trying raw bytes with utf-8.")
-                 # Fallback: read bytes and decode utf-8 ignoring errors
-                 return response.content.decode('utf-8', errors='ignore')
-            except Exception as e:
-                 logger.error(f"Error decoding response for {url}: {e}")
-                 return None # Cannot decode reliably
-    except httpx.HTTPStatusError as e:
-        logger.error(f"HTTP error fetching {url}: {e.response.status_code} - {e}")
-    except httpx.ConnectError as e:
-         # Catch specific connection errors like DNS failures
-        logger.error(f"Connection error fetching {url}: {e}")
-    except httpx.TimeoutException as e:
-        logger.error(f"Timeout error fetching {url}: {e}")
-    except httpx.RequestError as e:
-        logger.error(f"Request error fetching {url}: {e}")
-    except Exception as e:
-        logger.error(f"Unexpected error fetching {url}: {e}", exc_info=True)
     return None
 async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
-    """Fetches YouTube transcript using Supadata API."""
-    if not api_key: return None
-    api_url = f"https://api.supadata.net/youtube/transcript?video_id={video_id}"
-    headers = {'X-API-Key': api_key, 'Accept': 'application/json'}
-    logger.info(f"Attempting transcript fetch via Supadata for {video_id}")
     try:
-        # Note: If CERTIFICATE_VERIFY_FAILED persists, it's an issue with api.supadata.net's cert.
-        # Do NOT disable verification (verify=False) unless absolutely necessary and understood.
         async with httpx.AsyncClient(timeout=30.0) as client:
-            response = await client.get(api_url, headers=headers)
-            response.raise_for_status()
-            data = response.json()
-            if data and isinstance(data, list) and data[0].get("text"):
-                transcript = " ".join([item["text"] for item in data if "text" in item])
-                logger.info(f"Supadata transcript fetched successfully for {video_id} (length: {len(transcript)})")
-                return transcript
-            else:
-                logger.warning(f"Supadata response format unexpected or empty for {video_id}: {data}")
-                return None
-    except httpx.ConnectError as e:
-         # Log SSL verification errors specifically if they occur
-        if "CERTIFICATE_VERIFY_FAILED" in str(e):
-             logger.error(f"Supadata API SSL certificate verification failed for {video_id}: {e}. This is likely an issue with api.supadata.net's certificate.")
-        else:
-             logger.error(f"Supadata API connection error for {video_id}: {e}")
-    except httpx.HTTPStatusError as e:
-        logger.error(f"Supadata API HTTP error for {video_id}: {e.response.status_code} - {e}")
-    except Exception as e:
-        logger.error(f"Error fetching transcript via Supadata for {video_id}: {e}", exc_info=True)
-    return None
-async def get_transcript_via_apify(video_id: str, api_token: str) -> Optional[str]:
-    """Fetches YouTube transcript using Apify YouTube Scraper Actor."""
-    global APIFY_ACTOR_NAME # Use the globally configured/default actor name
-    if not ApifyClient or not api_token: return None
-    logger.info(f"Attempting transcript fetch via Apify (Actor: {APIFY_ACTOR_NAME}) for {video_id}")
     try:
-        client = ApifyClient(api_token)
-        # *** FIX: Use the correct actor name ***
-        actor = client.actor(APIFY_ACTOR_NAME)
-        if not actor:
-             logger.error(f"Could not find Apify actor: {APIFY_ACTOR_NAME}")
-             return None
-        actor_run = await asyncio.to_thread(
-             actor.call, # Run blocking call in thread
-             run_input={
-                 "startUrls": [{"url": f"https://www.youtube.com/watch?v={video_id}"}], # Use correct input format if needed
-                 "maxResultStreams": 0,
-                 "maxResults": 1, # Only need info for one video
-                 "maxResultCommentStreams": 0,
-                 "proxyConfiguration": {"useApifyProxy": True},
-                 "subtitles": True, # Explicitly request subtitles/transcript
-                 "maxDurationMinutes": 0, # No duration limit
-                 "skipComments": True,
-                 # Check actor docs for exact input schema
-             },
-             timeout_secs=120, # Timeout for the call itself
-             wait_secs=120 # Timeout for waiting for run completion
-        )
-        if not actor_run or 'defaultDatasetId' not in actor_run:
-            logger.warning(f"Apify actor run did not return expected dataset ID for {video_id}. Run details: {actor_run}")
-            return None
-        logger.info(f"Apify actor run started/retrieved for {video_id}. Dataset ID: {actor_run['defaultDatasetId']}")
-        # Fetch results from the dataset
-        dataset = client.dataset(actor_run["defaultDatasetId"])
-        # Run list_items in thread as it can be blocking I/O
-        dataset_page = await asyncio.to_thread(dataset.list_items, limit=5) # Limit items fetched initially
-        if dataset_page and dataset_page.items:
-            for item in dataset_page.items:
-                 # Apify output structure can vary; adapt as needed
-                 transcript_text = item.get('transcript') # Check common keys
-                 if not transcript_text and 'subtitles' in item: # Check alternative
-                     if isinstance(item['subtitles'], list) and len(item['subtitles']) > 0:
-                         transcript_text = " ".join(line.get('text', '') for line in item['subtitles'][0].get('lines', []))
-                     elif isinstance(item['subtitles'], str): # Sometimes it's just a string
-                          transcript_text = item['subtitles']
-                 if transcript_text and isinstance(transcript_text, str) and transcript_text.strip():
-                     logger.info(f"Apify transcript fetched successfully for {video_id} (length: {len(transcript_text)})")
-                     return transcript_text.strip()
-            logger.warning(f"Apify run completed for {video_id}, but no transcript found in dataset items.")
-        else:
-            logger.warning(f"Apify run completed for {video_id}, but dataset was empty or inaccessible.")
-    except ApifyApiError as e:
-         # Catch specific Apify errors like "Actor not found"
-         logger.error(f"Apify API error fetching transcript for {video_id} (Actor: {APIFY_ACTOR_NAME}): {e}")
-    except Exception as e:
-        logger.error(f"Unexpected error fetching transcript via Apify for {video_id}: {e}", exc_info=True)
-    return None
-async def get_youtube_transcript(video_id: str, url: str, supadata_key: Optional[str], apify_token: Optional[str]) -> Optional[str]:
-    """Tries different methods to get a YouTube transcript."""
-    transcript = None
-    # 1. Try Supadata API (if key exists)
-    if supadata_key:
-        transcript = await get_transcript_via_supadata(video_id, supadata_key)
-        if transcript: return transcript
-    # 2. Try youtube-transcript-api (Direct method)
-    logger.info(f"Attempting transcript fetch via youtube-transcript-api for {video_id}")
     try:
-        transcript_list = await asyncio.to_thread(YouTubeTranscriptApi.get_transcript, video_id)
-        transcript = " ".join([item['text'] for item in transcript_list])
-        logger.info(f"youtube-transcript-api transcript fetched successfully for {video_id} (length: {len(transcript)})")
-        return transcript
-    except (TranscriptsDisabled, NoTranscriptFound):
-        logger.warning(f"Transcripts disabled or unavailable via youtube-transcript-api for {video_id}.")
     except Exception as e:
-        logger.error(f"Error using youtube-transcript-api for {video_id}: {e}")
-    # 3. Try Apify (if token exists and other methods failed)
-    if not transcript and apify_token:
-         transcript = await get_transcript_via_apify(video_id, apify_token)
-         if transcript: return transcript
-    logger.warning(f"Failed to retrieve transcript for YouTube video {video_id} using all available methods.")
-    return None
-async def get_website_content_via_requests(url: str) -> Optional[str]:
-    """Fetches and extracts main text content from a website using BeautifulSoup."""
-    logger.info(f"Attempting website scrape via requests/BeautifulSoup for: {url}")
-    html_content = await fetch_url_content(url)
-    if not html_content:
-        return None
     try:
         def parse_html(content):
             # Use lxml if available, otherwise html.parser
             soup = BeautifulSoup(content, DEFAULT_PARSER)
-            for script_or_style in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "iframe"]):
-                script_or_style.decompose()
-            # Consider targeting specific elements like <article>, <main>, .post-content etc.
-            main_content = soup.find('article') or soup.find('main') or soup.body
-            if not main_content: main_content = soup # Fallback to whole soup if no main tags
-            text = main_content.get_text(separator='\n', strip=True)
-            lines = (line.strip() for line in text.splitlines())
-            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-            text = '\n'.join(chunk for chunk in chunks if chunk)
             return text
         text_content = await asyncio.to_thread(parse_html, html_content)
-        if text_content and len(text_content) > 100:
-            logger.info(f"Successfully scraped content via requests/BeautifulSoup for {url} (length: {len(text_content)})")
-            return text_content
-        else:
-            logger.warning(f"Scraping via requests/BeautifulSoup for {url} yielded minimal content (length: {len(text_content) if text_content else 0}).")
-            return None
-    except Exception as e:
-        logger.error(f"Error parsing website content with BeautifulSoup for {url}: {e}", exc_info=True)
-        return None
-async def get_website_content_via_urltotext_api(url: str, api_key: str) -> Optional[str]:
-    """Fetches website content using the UrlToText API."""
-    if not api_key: return None
-    api_endpoint = "https://api.urltotext.ai/text"
-    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
-    payload = {"url": url, "text_only": True}
-    logger.info(f"Attempting website content fetch via UrlToText API for: {url}")
     try:
-        async with httpx.AsyncClient(timeout=45.0) as client:
             response = await client.post(api_endpoint, headers=headers, json=payload)
-            response.raise_for_status()
-            data = response.json()
-            if "text" in data and data["text"]:
-                content = data["text"]
-                logger.info(f"Successfully fetched content via UrlToText API for {url} (length: {len(content)})")
-                return content
-            else:
-                logger.warning(f"UrlToText API response did not contain text for {url}. Response: {data}")
-                return None
-    except httpx.ConnectError as e:
-        # Catch DNS error specifically if needed, but general ConnectError covers it
-        logger.error(f"UrlToText API connection error for {url}: {e}. Check network/DNS.")
-    except httpx.HTTPStatusError as e:
-        logger.error(f"UrlToText API HTTP error for {url}: {e.response.status_code} - {e}")
-    except Exception as e:
-        logger.error(f"Error fetching content via UrlToText API for {url}: {e}", exc_info=True)
-    return None
-# --- Summarization Function ---
-async def generate_summary(content: str, summary_type: str, api_key: Optional[str]) -> str:
-    """Generates a summary using OpenRouter API."""
-    global OPENROUTER_MODEL # Use the globally configured/default model
-    if not api_key:
-        return "Error: OpenRouter API key is not configured."
-    if not content:
-        return "Error: No content provided to summarize."
-    if len(content) < 50:
-        return "The provided content is too short to summarize effectively."
-    max_chars = 100000
-    if len(content) > max_chars:
-        logger.warning(f"Content length ({len(content)}) exceeds max_chars ({max_chars}), truncating.")
-        content = content[:max_chars]
-    prompt_template = """
-Please summarize the following text. The summary should capture the key points and main ideas accurately and concisely.
-Provide the summary in {format_style} format.
-Text to summarize:
----
-{text}
----
-Summary ({format_style}):
-"""
-    format_style = "a concise paragraph" if summary_type == "paragraph" else "bullet points (using * or - for each point)"
-    prompt = prompt_template.format(text=content, format_style=format_style)
-    logger.info(f"Sending request to OpenRouter (Model: {OPENROUTER_MODEL}) for {summary_type} summary.")
     try:
-        async with httpx.AsyncClient(timeout=120.0) as client:
-            response = await client.post(
-                url="https://openrouter.ai/api/v1/chat/completions",
-                headers={
-                    "Authorization": f"Bearer {api_key}",
-                    "Content-Type": "application/json",
-                    # Optional: Add custom site identifier
-                    # "HTTP-Referer": "YOUR_SITE_URL",
-                    # "X-Title": "Telegram Summarizer Bot"
-                },
-                json={
-                    "model": OPENROUTER_MODEL,
-                    "messages": [{"role": "user", "content": prompt}],
-                    "max_tokens": 1024, # Adjust based on expected summary length
-                    # Optional: Add temperature, top_p etc. if needed
-                },
-            )
-            response.raise_for_status()
-            data = response.json()
-            if data.get("choices") and len(data["choices"]) > 0:
-                summary = data["choices"][0].get("message", {}).get("content", "").strip()
-                if summary:
-                    logger.info(f"Summary generated successfully (length: {len(summary)})")
-                    # More robust Markdown escaping needed for PTB's MarkdownV2
-                    # For simple Markdown, basic escaping might suffice, but V2 is safer
-                    # summary = escape_markdown(summary) # Implement or import escape_markdown if using V2
-                    # Basic escaping for ParseMode.MARKDOWN:
-                    summary = summary.replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
-                    return summary
-                else:
-                    logger.error("OpenRouter response successful, but summary content is empty.")
-                    return "Sorry, the AI generated an empty summary. Please try again."
-            else:
-                # Log the error details if available in the response
-                error_details = data.get("error")
-                logger.error(f"OpenRouter response format unexpected or error: {error_details or data}")
-                return f"Sorry, I received an unexpected response or error from the summarization service: {error_details}"
-    except httpx.HTTPStatusError as e:
-        error_body = ""
-        try: error_body = e.response.text
-        except Exception: pass
-        logger.error(f"OpenRouter API HTTP error: {e.response.status_code} - {e}. Response body: {error_body}")
-        return f"Sorry, there was an error communicating with the summarization service (HTTP {e.response.status_code})."
-    except Exception as e:
-        logger.error(f"Error generating summary via OpenRouter: {e}", exc_info=True)
-        return "Sorry, an unexpected error occurred while generating the summary."
-# --- Background Task Processing ---
 async def process_summary_task(
-    user_id: int,
-    chat_id: int,
-    message_id_to_edit: Optional[int],
-    url: str,
-    summary_type: str,
-    bot_token: str
 ) -> None:
-    """Handles the actual fetching and summarization in a background task."""
     task_id = f"{user_id}-{message_id_to_edit or 'new'}"
     logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
-    # Create a new request handler and bot instance for this task
     background_request: Optional[BaseRequest] = None
     bot: Optional[Bot] = None
-    try:
-        background_request = HTTPXRequest(
-            connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0, http_version="1.1"
-        )
         bot = Bot(token=bot_token, request=background_request)
-    except Exception as e:
-         logger.critical(f"[Task {task_id}] Failed to create background bot instance: {e}", exc_info=True)
-         # Cannot proceed without a bot instance
-         return # Or raise? Silently failing might hide issues.
-    content = None
-    user_feedback_message = None
-    success = False
-    final_summary = ""
     status_message_id = message_id_to_edit
     try:
-        # --- Inform User Processing Has Started ---
-        processing_message_text = f"⏳ Working on your '{summary_type}' summary for:\n`{url}`\n\n_(Fetching & summarizing...)_"
         if status_message_id:
             try:
-                await retry_bot_operation(
-                    bot.edit_message_text, chat_id=chat_id, message_id=status_message_id,
-                    text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None
-                )
-                logger.debug(f"[Task {task_id}] Successfully edited message {status_message_id} to 'Processing'")
             except Exception as e:
-                logger.warning(f"[Task {task_id}] Could not edit original message {status_message_id}: {e}. Will send a new status message.")
-                status_message_id = None
-        if not status_message_id:
              try:
-                 status_message = await retry_bot_operation(
-                     bot.send_message, chat_id=chat_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN
-                 )
-                 if status_message: # Check if message was actually sent (retry might return None on ignore)
-                     status_message_id = status_message.message_id
-                     logger.debug(f"[Task {task_id}] Sent new status message {status_message_id}")
-                 else:
-                      logger.error(f"[Task {task_id}] Failed to send new status message after retries.")
-                      raise RuntimeError("Failed to send initial status message")
-             except Exception as e:
-                 logger.error(f"[Task {task_id}] Failed to send new status message: {e}")
-                 raise RuntimeError("Failed to send initial status message") from e
-        # --- Main Content Fetching and Summarization ---
         try:
             await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
-            is_yt = is_youtube_url(url)
-            logger.debug(f"[Task {task_id}] URL is YouTube: {is_yt}")
-            if is_yt:
                 video_id = extract_youtube_id(url)
-                if video_id:
-                    logger.info(f"[Task {task_id}] Fetching YouTube transcript for {video_id}")
-                    content = await get_youtube_transcript(video_id, url, SUPADATA_API_KEY, APIFY_API_TOKEN)
-                    if not content: user_feedback_message = "⚠️ Sorry, I couldn't retrieve the transcript for that YouTube video. It might be disabled or unavailable."
-                else: user_feedback_message = "⚠️ Couldn't extract a valid YouTube video ID from the link."
-            else:
-                logger.info(f"[Task {task_id}] Attempting website scrape for: {url}")
-                content = await get_website_content_via_requests(url)
-                if not content and URLTOTEXT_API_KEY:
-                    logger.info(f"[Task {task_id}] Basic scrape failed/insufficient, trying UrlToText API...")
-                    await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
-                    content = await get_website_content_via_urltotext_api(url, URLTOTEXT_API_KEY)
-                if not content: user_feedback_message = "⚠️ Sorry, I couldn't fetch or extract meaningful content from that website."
             if content:
-                logger.info(f"[Task {task_id}] Content fetched (length: {len(content)}). Generating '{summary_type}' summary.")
                 await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
-                final_summary = await generate_summary(content, summary_type, OPENROUTER_API_KEY)
                 if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
-                    user_feedback_message = f"⚠️ {final_summary}"
-                else: success = True
-            # If content fetching failed, user_feedback_message is already set
         except Exception as e:
-            logger.error(f"[Task {task_id}] Error during content fetching or summarization: {e}", exc_info=True)
-            user_feedback_message = "❌ An unexpected error occurred while processing your request."
-        # --- Send Final Result or Error ---
-        if success and final_summary:
-            max_length = 4096
-            summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
-            await retry_bot_operation(
-                bot.send_message, chat_id=chat_id, text=summary_parts[0],
-                parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True}
-            )
-            for part in summary_parts[1:]:
-                 await asyncio.sleep(0.5)
-                 await retry_bot_operation(
-                     bot.send_message, chat_id=chat_id, text=part,
-                     parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True}
-                 )
-            logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts).")
-        elif user_feedback_message:
-            logger.warning(f"[Task {task_id}] Sending feedback/error message: {user_feedback_message}")
-            await retry_bot_operation(
-                bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True}
-            )
-        else:
-            logger.error(f"[Task {task_id}] Reached end of task without success or specific error message.")
-            await retry_bot_operation(
-                bot.send_message, chat_id=chat_id, text="❓ Something went wrong, but no specific error was identified.",
-                link_preview_options={'is_disabled': True}
-            )
-    except Exception as e:
-        logger.critical(f"[Task {task_id}] Critical error within task processing: {e}", exc_info=True)
         try:
-            # Use the bot instance created at the start of the task if available
-            if bot:
-                 await retry_bot_operation(
-                     bot.send_message, chat_id=chat_id,
-                     text="❌ A critical internal error occurred. Please report this if it persists."
-                 )
-            else:
-                 logger.error("[Task ??] Cannot send critical error message: Bot instance not available.")
-        except Exception:
-            logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
     finally:
-        # --- Clean up Status Message ---
-        if status_message_id and bot: # Ensure bot exists before trying to delete
             try:
-                await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=status_message_id)
-                logger.debug(f"[Task {task_id}] Deleted status message {status_message_id}")
-            except Exception as e:
-                # Log benignly if deletion fails (e.g., message already deleted)
-                logger.warning(f"[Task {task_id}] Failed to delete status message {status_message_id}: {e}")
-        # --- Clean up Background Bot's HTTPX Client ---
-        # *** FIX: Correct way to close client for manually created Bot ***
         if background_request and hasattr(background_request, '_client') and background_request._client:
-             try:
-                 await background_request._client.aclose()
-                 logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
-             except Exception as e:
-                 logger.warning(f"[Task {task_id}] Error closing background bot's HTTPX client: {e}")
-        else:
-             logger.debug(f"[Task {task_id}] Background bot's HTTPX client already closed or not found.")
         logger.info(f"[Task {task_id}] Task completed. Success: {success}")
-# --- Telegram Bot Handlers (Mostly Unchanged) ---
 async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
-    """Handles the /start command."""
-    user = update.effective_user
     if not user or not update.message: return
-    logger.info(f"User {user.id} initiated /start.")
-    mention = user.mention_html()
-    start_message = (
-        f"�� Hello {mention}!\n\n"
-        "I can summarise YouTube videos or web articles for you.\n\n"
-        "Just send me a link (URL) and I'll ask you whether you want the summary as a paragraph or bullet points.\n\n"
-        "Type /help for more details."
-    )
-    await update.message.reply_html(start_message)
 async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
-    """Handles the /help command."""
     user = update.effective_user
     if not user or not update.message: return
-    logger.info(f"User {user.id} requested /help.")
-    help_text = (
-        "**How to Use Me:**\n"
-        "1. Send me a direct link (URL) to a YouTube video or a web article.\n"
-        "2. I will ask you to choose the summary format: `Paragraph` or `Points`.\n"
-        "3. Click the button for your preferred format.\n"
-        "4. I'll fetch the content, summarise it using AI, and send it back to you!\n\n"
-        "**Important Notes:**\n"
-        "- **YouTube:** Transcript availability varies. I try multiple methods.\n"
-        "- **Websites:** I attempt basic scraping and can use UrlToText API (if configured) for complex sites.\n"
-        "- **AI Summaries:** Provided by OpenRouter (using model: `{model}`). Accuracy may vary.\n"
-        "- **Length Limits:** Very long content might be truncated.\n\n"
-        "Just send a link to get started!"
-    ).format(model=OPENROUTER_MODEL) # Show the model being used
     await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
 async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
-    """Handles messages containing potential URLs."""
     if not update.message or not update.message.text: return
-    message_text = update.message.text.strip()
-    user = update.effective_user
     if not user: return
-    url_pattern = r'https?://[^\s<>"]+|www\.[^\s<>"]+'
-    match = re.search(url_pattern, message_text)
-    if match:
-        url = match.group(0)
-        url = re.sub(r'[.,!?)\]>]+$', '', url) # Basic cleanup
-        # Further clean URL if needed, e.g., removing tracking params (complex)
-        logger.info(f"User {user.id} sent potential URL: {url}")
-        context.user_data['url_to_summarize'] = url
-        context.user_data['original_message_id'] = update.message.message_id
-        keyboard = [
-            [
-                InlineKeyboardButton("📜 Paragraph", callback_data="paragraph"),
-                InlineKeyboardButton("🔹 Bullet Points", callback_data="points")
-            ]
-        ]
-        reply_markup = InlineKeyboardMarkup(keyboard)
-        await update.message.reply_text(
-            f"✅ Link received:\n`{url}`\n\nChoose your desired summary format:",
-            reply_markup=reply_markup,
-            parse_mode=ParseMode.MARKDOWN,
-            link_preview_options={'is_disabled': True}
-        )
-    elif not message_text.startswith('/'):
-        logger.debug(f"User {user.id} sent non-URL, non-command text: '{message_text[:50]}...'")
-        if "http" in message_text or "www." in message_text or ".com" in message_text or ".org" in message_text or ".net" in message_text:
-             await update.message.reply_text("Hmm, that looks like it might be a link, but please ensure it starts with `http://` or `https://` and is a valid URL.")
 async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
-    """Handles button presses for summary type selection."""
     query = update.callback_query
-    if not query or not query.message or not query.from_user:
-        logger.warning("Callback query received without essential data.")
-        if query: await query.answer()
-        return
-    user = query.from_user
-    summary_type = query.data
-    query_id = query.id
-    try:
-        await query.answer()
-        logger.debug(f"Acknowledged callback query {query_id} from user {user.id}")
-    except Exception as e:
-        logger.error(f"Error answering callback query {query_id} from user {user.id}: {e}", exc_info=True)
     url = context.user_data.get('url_to_summarize')
-    message_id_to_edit = query.message.message_id
-    logger.info(f"User {user.id} chose summary type '{summary_type}' for URL associated with message {message_id_to_edit}")
     if not url:
-        logger.warning(f"No URL found in user_data for user {user.id} (callback query {query_id}). Editing message.")
-        try:
-            # Edit the message the button was attached to
-            await query.edit_message_text(text="⚠️ Oops! I couldn't find the link associated with this request. Please send the link again.")
-        except Exception as e:
-            logger.error(f"Failed to edit message to show 'URL not found' error: {e}")
         return
-    context.user_data.pop('url_to_summarize', None)
-    context.user_data.pop('original_message_id', None)
-    if not TELEGRAM_TOKEN:
-         logger.critical("TELEGRAM_TOKEN is missing, cannot start background task!")
-         try:
-             await query.edit_message_text(text="❌ Internal configuration error. Cannot process request.")
-         except Exception: pass
-         return
-    logger.info(f"Scheduling background task for user {user.id}, chat {query.message.chat_id}, message {message_id_to_edit}, type {summary_type}")
-    asyncio.create_task(
-        process_summary_task(
-            user_id=user.id,
-            chat_id=query.message.chat_id,
-            message_id_to_edit=message_id_to_edit,
-            url=url,
-            summary_type=summary_type,
-            bot_token=TELEGRAM_TOKEN
-        ),
-        name=f"SummaryTask-{user.id}-{message_id_to_edit}"
-    )
 async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
     """Log Errors caused by Updates."""
-    # Ignore errors related to background task exceptions that were already handled/logged
-    if isinstance(context.error, AttributeError) and "'Bot' object has no attribute 'session'" in str(context.error):
-         logger.debug(f"Ignoring known cleanup error in error_handler: {context.error}")
          return
     logger.error("Exception while handling an update:", exc_info=context.error)
-    # Example: Inform user on specific, potentially temporary errors
-    # if isinstance(context.error, (NetworkError, TimedOut)):
-    #     try:
-    #         if update and isinstance(update, Update) and update.effective_chat:
-    #             await context.bot.send_message(
-    #                 chat_id=update.effective_chat.id,
-    #                 text="I'm having temporary network issues. Please try again in a moment."
-    #             )
-    #     except Exception as e:
-    #         logger.error(f"Failed to send error notification to user: {e}")
-# --- Bot Setup Function ---
 async def setup_bot_config() -> Application:
     """Configures the PTB Application."""
     logger.info("Configuring Telegram Application...")
-    if not TELEGRAM_TOKEN:
-        raise ValueError("TELEGRAM_TOKEN environment variable not set.")
-    custom_request = HTTPXRequest(
-        connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0, http_version="1.1"
-    )
-    application = (
-        Application.builder()
-        .token(TELEGRAM_TOKEN)
-        .request(custom_request)
-        .build()
-    )
     application.add_handler(CommandHandler("start", start))
     application.add_handler(CommandHandler("help", help_command))
     application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
     application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
     application.add_error_handler(error_handler)
     logger.info("Telegram application handlers configured.")
     return application
-# --- ASGI Lifespan Context Manager ---
 @contextlib.asynccontextmanager
 async def lifespan(app: Starlette):
     """Handles PTB startup and shutdown during ASGI lifespan."""
-    global ptb_app, WEBHOOK_SECRET # Make secret global for access in webhook handler
-    logger.info("ASGI Lifespan: Startup sequence initiated...")
-    if not TELEGRAM_TOKEN:
-        logger.critical("TELEGRAM_TOKEN is not set. Bot cannot start.")
-        raise RuntimeError("Telegram token missing.")
-    bot_info_text = "Bot info not available yet."
     try:
         ptb_app = await setup_bot_config()
         await ptb_app.initialize()
         bot_info = await ptb_app.bot.get_me()
-        bot_info_text = f"@{bot_info.username} (ID: {bot_info.id})"
-        logger.info(f"Bot initialized: {bot_info_text}")
         current_webhook_info = await ptb_app.bot.get_webhook_info()
         if current_webhook_info and current_webhook_info.url:
-            logger.info(f"Found existing webhook: {current_webhook_info.url}. Attempting to delete it.")
             try:
-                # Use drop_pending_updates=False if you want to process updates accumulated while down
-                if await ptb_app.bot.delete_webhook(drop_pending_updates=True):
-                     logger.info("Existing webhook deleted successfully.")
-                else:
-                     logger.warning("Failed to delete existing webhook (API returned False).")
-            except Exception as e:
-                logger.warning(f"Could not delete existing webhook: {e}")
-                await asyncio.sleep(1)
         space_host = os.environ.get("SPACE_HOST")
-        webhook_path = "/webhook"
-        full_webhook_url = None
         if space_host:
-            protocol = "https://" # Assume HTTPS for HF Spaces
-            host = space_host.split('://')[-1] # Get host part regardless of existing protocol
             full_webhook_url = f"{protocol}{host.rstrip('/')}{webhook_path}"
             if full_webhook_url:
-                logger.info(f"Attempting to set webhook to: {full_webhook_url}")
-                # Use secret token if configured
-                set_webhook_args = {
-                    "url": full_webhook_url,
-                    "allowed_updates": Update.ALL_TYPES,
-                    "drop_pending_updates": True,
-                }
-                if WEBHOOK_SECRET:
-                    set_webhook_args["secret_token"] = WEBHOOK_SECRET
-                    logger.info("Webhook will be set with a secret token.")
-                await asyncio.sleep(1.0) # Slightly shorter wait
                 try:
                     await ptb_app.bot.set_webhook(**set_webhook_args)
                     webhook_info = await ptb_app.bot.get_webhook_info()
-                    # Check if the URL and secret status match expectations
-                    if webhook_info.url == full_webhook_url:
-                         logger.info(f"Webhook successfully set: URL='{webhook_info.url}', Pending={webhook_info.pending_update_count}, Secret={bool(WEBHOOK_SECRET)}")
-                    else:
-                         logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', Got '{webhook_info.url}'")
-                         raise RuntimeError("Webhook URL mismatch after setting.")
                     await ptb_app.start()
-                    logger.info("PTB Application started (webhook mode). Ready for updates.")
-                except Exception as e:
-                    logger.error(f"FATAL: Failed to set webhook to {full_webhook_url}: {e}", exc_info=True)
-                    raise RuntimeError(f"Failed to set webhook: {e}") from e
-            else:
-                 logger.critical("Could not construct valid HTTPS webhook URL from SPACE_HOST.")
-                 raise RuntimeError("Webhook URL could not be determined.")
-        else:
-            logger.critical("SPACE_HOST environment variable not found. Cannot set webhook for HF Space.")
-            raise RuntimeError("SPACE_HOST env var missing, cannot run in webhook mode.")
         logger.info("ASGI Lifespan: Startup complete.")
-        yield # Application runs here
     except Exception as startup_err:
         logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
         if ptb_app:
              if ptb_app.running: await ptb_app.stop()
              await ptb_app.shutdown()
         raise
-    finally:
-        logger.info("ASGI Lifespan: Shutdown sequence initiated...")
         if ptb_app:
-            if ptb_app.running:
-                 logger.info("Stopping PTB application...")
-                 await ptb_app.stop()
-            logger.info("Shutting down PTB application...")
-            await ptb_app.shutdown() # This closes the main bot's request client
-            logger.info("PTB Application shut down gracefully.")
-        else:
-            logger.info("PTB application was not initialized or startup failed.")
         logger.info("ASGI Lifespan: Shutdown complete.")
-# --- Starlette Route Handlers ---
 async def health_check(request: Request) -> PlainTextResponse:
     """Basic health check endpoint."""
     bot_status = "Not Initialized"
     if ptb_app and ptb_app.bot:
         try:
-             if ptb_app.running:
-                 # Using a flag or cached info is better than get_me repeatedly
-                 bot_info = await ptb_app.bot.get_me()
-                 bot_status = f"Running (@{bot_info.username})"
-             else:
-                 bot_status = "Initialized but not running"
-        except Exception as e:
-             bot_status = f"Error checking status: {e}"
-    return PlainTextResponse(f"Telegram Bot Summarizer - Status: {bot_status}\nModel: {OPENROUTER_MODEL}\nApify Actor: {APIFY_ACTOR_NAME if _apify_token_exists else 'N/A'}")
 async def telegram_webhook(request: Request) -> Response:
     """Webhook endpoint called by Telegram."""
-    global WEBHOOK_SECRET # Access the global secret
-    if not ptb_app:
-        logger.error("Webhook received but PTB application not initialized.")
-        return PlainTextResponse('Bot not initialized', status_code=503)
-    if not ptb_app.running:
-         logger.warning("Webhook received but PTB application not running.")
-         return PlainTextResponse('Bot initialized but not running', status_code=503)
     try:
-        # Verify secret token if configured
         if WEBHOOK_SECRET:
              token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
-             if token_header != WEBHOOK_SECRET:
-                  logger.warning(f"Webhook received with invalid secret token. Header: '{token_header}'")
-                  return Response(content="Invalid secret token", status_code=403) # Forbidden
         update_data = await request.json()
         update = Update.de_json(data=update_data, bot=ptb_app.bot)
         logger.debug(f"Processing update_id: {update.update_id} via webhook")
-        # PTB's process_update runs the handlers
         await ptb_app.process_update(update)
-        # Return 200 OK quickly to Telegram
-        return Response(status_code=200)
-    except json.JSONDecodeError:
-        logger.error("Webhook received invalid JSON.")
-        return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
-    except Exception as e:
-        logger.error(f"Error processing webhook update: {e}", exc_info=True)
-        # Return 200 OK to Telegram even if processing failed, to avoid retries for app logic errors
-        return Response(status_code=200)
-# --- Create Starlette ASGI Application ---
-app = Starlette(
-    debug=False,
-    lifespan=lifespan,
-    routes=[
         Route("/", endpoint=health_check, methods=["GET"]),
-        Route("/webhook", endpoint=telegram_webhook, methods=["POST"]),
-    ]
-)
 logger.info("Starlette ASGI application created with native routes.")
-# --- Development Server Execution Block (Optional) ---
 if __name__ == '__main__':
     import uvicorn
-    logger.warning("Running in development mode using Uvicorn directly (not for production)")
-    # Use LOGGING_LEVEL env var or default to info
     log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
     local_port = int(os.environ.get('PORT', 8080))
-    # Run Uvicorn with the app instance
-    uvicorn.run("__main__:app", host='0.0.0.0', port=local_port, log_level=log_level, reload=True) # Add reload for dev

+# main.py (Refactored to match Colab logic using httpx and ASGI structure)
 import os
 import re
 import logging
 )
 from telegram.constants import ParseMode
 from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest, TelegramError
+from telegram.request import HTTPXRequest, BaseRequest
 # --- Other Libraries ---
+import httpx # Use httpx for all async HTTP calls
 from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 from bs4 import BeautifulSoup
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type, before_sleep_log
 try:
     import lxml
     DEFAULT_PARSER = 'lxml'
 except ImportError:
     DEFAULT_PARSER = 'html.parser'
+# NOTE: apify-client is NOT used, as we replicate the REST API call from Colab
 # --- Logging Setup ---
 logging.basicConfig(
     level=logging.INFO
 )
 logging.getLogger("httpx").setLevel(logging.WARNING)
+# No apify_client logger needed
 logging.getLogger("telegram.ext").setLevel(logging.INFO)
 logging.getLogger('telegram.bot').setLevel(logging.INFO)
 logging.getLogger("urllib3").setLevel(logging.INFO)
 # --- Global variable for PTB app ---
 ptb_app: Optional[Application] = None
+# --- Environment Variable Loading & Configuration ---
+logger.info("Attempting to load secrets and configuration...")
 def get_secret(secret_name):
     value = os.environ.get(secret_name)
     log_length = min(len(value), 8) if value else 0
     status = "Found" if value else "Not Found"
     logger.info(f"Secret '{secret_name}': {status} (Value starts with: {value[:log_length]}...)")
 TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
 OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY')
+URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY') # For urltotext.com API
 SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
+APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')   # For Apify REST API call
+WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
+# Configuration matching Colab script
+OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
+APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo/youtube-transcripts") # Actor used in Colab
+# Check Essential Keys
+if not TELEGRAM_TOKEN:
+    logger.critical("❌ FATAL: TELEGRAM_TOKEN not found in environment variables.")
+    raise RuntimeError("Exiting: Telegram token missing.")
+if not OPENROUTER_API_KEY:
+    logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
+    # Allow running without summary capability? For now, we'll let it run but log error.
+    # raise RuntimeError("Exiting: OpenRouter key missing.")
+# Log warnings for optional keys (used in fallbacks)
+if not URLTOTEXT_API_KEY: logger.warning("⚠️ WARNING: URLTOTEXT_API_KEY not found. Fallback website scraping unavailable.")
+if not SUPADATA_API_KEY: logger.warning("⚠️ WARNING: SUPADATA_API_KEY not found. First YT transcript fallback unavailable.")
+if not APIFY_API_TOKEN: logger.warning("⚠️ WARNING: APIFY_API_TOKEN not found. Second YT transcript fallback unavailable.")
+logger.info("Secret loading and configuration check finished.")
 logger.info(f"Using OpenRouter Model: {OPENROUTER_MODEL}")
+logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
+_apify_token_exists = bool(APIFY_API_TOKEN) # Flag for conditional logic
+# --- Retry Decorator (Unchanged) ---
+@retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15),
+        retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)),
+        before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
 async def retry_bot_operation(func, *args, **kwargs):
+    try: return await func(*args, **kwargs)
     except BadRequest as e:
+        ignore_errors = [ "message is not modified", "query is too old", "message to edit not found", "chat not found", "bot was blocked by the user", ]
+        if any(err in str(e).lower() for err in ignore_errors): logger.warning(f"Ignoring non-critical BadRequest: {e}"); return None
+        logger.error(f"Potentially critical BadRequest: {e}"); raise
+    except TelegramError as e: logger.warning(f"TelegramError (will retry if applicable): {e}"); raise
+    except Exception as e: logger.error(f"Unexpected error during bot operation: {e}", exc_info=True); raise
+# --- Helper Functions (Unchanged) ---
 def is_youtube_url(url):
+    youtube_regex = re.compile( r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/' r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
+    match = youtube_regex.search(url); logger.debug(f"is_youtube_url '{url}': {bool(match)}"); return bool(match)
 def extract_youtube_id(url):
+    youtube_regex = re.compile( r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/' r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
     match = youtube_regex.search(url)
+    if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
+    else: logger.warning(f"Could not extract YT ID from {url}"); return None
+# --- Content Fetching Functions (Reimplemented based on Colab logic using httpx) ---
+# Generic fetcher used by website scraping (similar to previous version)
+async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
+    """Fetches HTML content from a URL using httpx for scraping."""
+    headers = { # Headers from Colab script
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+        'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1'
     }
     try:
+        async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
+            logger.debug(f"[Web Scrape] Sending request to {url}")
             response = await client.get(url)
+            logger.debug(f"[Web Scrape] Received response {response.status_code} from {url}")
             response.raise_for_status()
+            content_type = response.headers.get('content-type', '').lower()
+            if 'html' not in content_type:
+                logger.warning(f"[Web Scrape] Non-HTML content type from {url}: {content_type}")
+                return None
+            try: return response.text # Let httpx handle encoding
+            except Exception as e: logger.error(f"[Web Scrape] Error decoding response for {url}: {e}"); return None
+    except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape] HTTP error {e.response.status_code} fetching {url}: {e}")
+    except httpx.TimeoutException: logger.error(f"[Web Scrape] Timeout error fetching {url}")
+    except httpx.TooManyRedirects: logger.error(f"[Web Scrape] Too many redirects fetching {url}")
+    except httpx.RequestError as e: logger.error(f"[Web Scrape] Request error fetching {url}: {e}") # Covers ConnectError etc.
+    except Exception as e: logger.error(f"[Web Scrape] Unexpected error fetching {url}: {e}", exc_info=True)
     return None
+# --- YT Transcript Fetching ---
 async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
+    """Fetches YouTube transcript using Supadata API (matching Colab endpoint)."""
+    if not video_id: logger.error("[Supadata] No video_id provided"); return None
+    if not api_key: logger.error("[Supadata] API key missing."); return None
+    logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}")
+    # Colab script uses /v1/youtube/transcript
+    api_endpoint = "https://api.supadata.net/v1/youtube/transcript"
+    params = {"videoId": video_id, "format": "text"} # Params from Colab script
+    headers = {"X-API-Key": api_key}
     try:
         async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(api_endpoint, headers=headers, params=params)
+            logger.debug(f"[Supadata] Status code {response.status_code} for {video_id}")
+            if response.status_code == 200:
+                try:
+                    # Try JSON first, then plain text as fallback (Colab logic)
+                    try: data = response.json()
+                    except json.JSONDecodeError: data = None
+                    content = None
+                    if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
+                    if not content and response.text: content = response.text # Plain text fallback
+                    if content and isinstance(content, str):
+                        logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}")
+                        return content.strip()
+                    else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
+                except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
+            elif response.status_code in [401, 403]: logger.error(f"[Supadata] Auth error ({response.status_code}). Check API key."); return None # Don't retry
+            elif response.status_code == 404: logger.warning(f"[Supadata] Not found (404) for {video_id}."); return None
+            else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
+    except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
+    except httpx.RequestError as e: logger.error(f"[Supadata] Request error for {video_id}: {e}"); return None # Includes ConnectError, SSL problems etc.
+    except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
+async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
+    """Fetches YouTube transcript using Apify REST API (matching Colab actor/endpoint)."""
+    global APIFY_ACTOR_ID # Use globally configured actor
+    if not video_url: logger.error("[Apify] No video_url provided"); return None
+    if not api_token: logger.error("[Apify] API token missing."); return None
+    logger.info(f"[Apify] Attempting fetch via REST for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
+    api_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
+    params = {"token": api_token}
+    # Payload from Colab script
+    payload = {
+        "urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5,
+        "channelHandleBoolean": False, "channelNameBoolean": False,
+        "datePublishedBoolean": False, "relativeDateTextBoolean": False,
+        # Add proxy config here if needed and configured via env vars
+        # "proxyOptions": { "useApifyProxy": True, "apifyProxyGroups": ["YOUR_PROXY_GROUP_IF_ANY"] },
+    }
+    headers = {"Content-Type": "application/json"}
     try:
+        async with httpx.AsyncClient(timeout=90.0) as client: # Longer timeout for sync run
+            logger.debug(f"[Apify] Sending request to run actor {APIFY_ACTOR_ID} synchronously for {video_url}")
+            response = await client.post(api_endpoint, headers=headers, params=params, json=payload) # Use json=payload with httpx
+            logger.debug(f"[Apify] Received status code {response.status_code} for {video_url}")
+            if response.status_code == 200:
+                try:
+                    results = response.json()
+                    if isinstance(results, list) and len(results) > 0:
+                        item = results[0]
+                        # Parsing logic from Colab script
+                        content = item.get("text") or item.get("transcript") or item.get("captions_concatenated")
+                        if not content and item.get("captions") and isinstance(item["captions"], list):
+                            logger.info("[Apify] Processing 'captions' format.")
+                            content = " ".join(cap.get("text", "") for cap in item["captions"] if cap.get("text"))
+                        if content and isinstance(content, str):
+                            logger.info(f"[Apify] Success via REST for {video_url}. Length: {len(content)}")
+                            return content.strip()
+                        else: logger.warning(f"[Apify] Actor success but transcript empty/not found for {video_url}. Item: {item}"); return None
+                    else: logger.warning(f"[Apify] Actor success but dataset empty for {video_url}. Response: {results}"); return None
+                except json.JSONDecodeError: logger.error(f"[Apify] Failed JSON decode for {video_url}. Status:{response.status_code}. Resp:{response.text[:200]}"); return None
+                except Exception as e: logger.error(f"[Apify] Error processing success response for {video_url}: {e}", exc_info=True); return None
+            elif response.status_code == 400: logger.error(f"[Apify] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
+            elif response.status_code == 401: logger.error("[Apify] Auth error (401). Check token."); return None # Don't retry
+            else: logger.error(f"[Apify] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
+    except httpx.TimeoutException: logger.error(f"[Apify] Timeout running actor for {video_url}"); return None
+    except httpx.RequestError as e: logger.error(f"[Apify] Request error running actor for {video_url}: {e}"); return None
+    except Exception as e: logger.error(f"[Apify] Unexpected error during Apify REST call for {video_url}: {e}", exc_info=True); return None
+async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
+    """Fetches YT transcript using primary (lib) + fallbacks (Supadata, Apify REST)."""
+    global SUPADATA_API_KEY, APIFY_API_TOKEN # Access globally loaded keys
+    if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
+    logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
+    transcript_text = None
+    # 1. Primary: youtube-transcript-api
+    logger.info("[Primary YT] Attempting youtube-transcript-api...")
     try:
+        transcript_list = await asyncio.to_thread( YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'] )
+        if transcript_list: transcript_text = " ".join([item['text'] for item in transcript_list if 'text' in item])
+        if transcript_text: logger.info(f"[Primary YT] Success via lib for {video_id} (len: {len(transcript_text)})"); return transcript_text
+        else: logger.warning(f"[Primary YT] Transcript list/text empty for {video_id}"); transcript_text = None
     except Exception as e:
+        logger.warning(f"[Primary YT] Error via lib for {video_id}: {e}")
+        if "No transcript found" in str(e): logger.warning(f"[Primary YT] No transcript found.")
+        elif "disabled" in str(e): logger.warning(f"[Primary YT] Transcripts disabled.")
+        transcript_text = None
+    # 2. Fallback 1: Supadata API
+    if transcript_text is None:
+        logger.info("[Fallback YT 1] Trying Supadata API...")
+        if SUPADATA_API_KEY:
+            transcript_text = await get_transcript_via_supadata(video_id, SUPADATA_API_KEY)
+            if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text
+            else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
+        else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
+    # 3. Fallback 2: Apify REST API
+    if transcript_text is None:
+        logger.info("[Fallback YT 2] Trying Apify REST API...")
+        if APIFY_API_TOKEN:
+            transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
+            if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify REST for {video_url}"); return transcript_text
+            else: logger.warning(f"[Fallback YT 2] Apify REST failed or no content for {video_url}.")
+        else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
+    if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
+    return transcript_text # Should be None if all failed
+# --- Website Content Fetching ---
+async def get_website_content(url: str) -> Optional[str]:
+    """Primary: Scrapes website using httpx + BeautifulSoup (logic from Colab)."""
+    if not url: logger.error("get_website_content: No URL"); return None
+    logger.info(f"[Primary Web] Fetching website content for: {url}")
+    html_content = await fetch_url_content_for_scrape(url)
+    if not html_content: return None
     try:
         def parse_html(content):
             # Use lxml if available, otherwise html.parser
             soup = BeautifulSoup(content, DEFAULT_PARSER)
+             # Removal logic from Colab script
+            for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]):
+                element.extract()
+            # Content finding logic from Colab script
+            main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
+            target_element = main_content if main_content else soup.body
+            if not target_element: logger.warning(f"[Primary Web] Could not find body/main for parsing {url}"); return None
+            lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
+            text = " ".join(lines)
+            if not text: logger.warning(f"[Primary Web] Extracted text empty after clean for {url}"); return None
             return text
         text_content = await asyncio.to_thread(parse_html, html_content)
+        if text_content: logger.info(f"[Primary Web] Success scrape for {url} (final len: {len(text_content)})"); return text_content
+        else: return None
+    except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
+async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
+    """Fallback: Fetches website content using urltotext.com API (Colab endpoint)."""
+    if not url: logger.error("[Fallback Web API] No URL"); return None
+    if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
+    logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
+    # Endpoint and payload from Colab script
+    api_endpoint = "https://urltotext.com/api/v1/urltotext/"
+    payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
+    headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" } # Note: Token auth
     try:
+        async with httpx.AsyncClient(timeout=45.0) as client: # Longer timeout for JS render
+            logger.debug(f"[Fallback Web API] Sending request to urltotext.com API for {url}")
             response = await client.post(api_endpoint, headers=headers, json=payload)
+            logger.debug(f"[Fallback Web API] Received status {response.status_code} from urltotext.com API for {url}")
+            if response.status_code == 200:
+                try:
+                    data = response.json()
+                    content = data.get("data", {}).get("content")
+                    credits = data.get("credits_used", "N/A")
+                    warning = data.get("data", {}).get("warning")
+                    if warning: logger.warning(f"[Fallback Web API] urltotext.com API Warning for {url}: {warning}")
+                    if content: logger.info(f"[Fallback Web API] Success via urltotext.com API for {url}. Len: {len(content)}. Credits: {credits}"); return content.strip()
+                    else: logger.warning(f"[Fallback Web API] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
+                except json.JSONDecodeError: logger.error(f"[Fallback Web API] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
+                except Exception as e: logger.error(f"[Fallback Web API] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
+            # Error codes from Colab script check
+            elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[Fallback Web API] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
+            else: logger.error(f"[Fallback Web API] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
+    except httpx.TimeoutException: logger.error(f"[Fallback Web API] Timeout connecting to urltotext.com API for {url}"); return None
+    except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
+    except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
+# --- Summarization Function (Using DeepSeek via OpenRouter - Colab logic) ---
+async def generate_summary(text: str, summary_type: str) -> str:
+    """Generates summary using DeepSeek via OpenRouter API (Colab prompts/model)."""
+    global OPENROUTER_API_KEY, OPENROUTER_MODEL # Use globally loaded config
+    logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
+    if not OPENROUTER_API_KEY: logger.error("OpenRouter key missing for generate_summary."); return "Error: AI model configuration key missing."
+    # Prompts from Colab script
+    if summary_type == "paragraph":
+        prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
+                  "• Clear and simple language suitable for someone unfamiliar with the topic.\n"
+                  "• Uses British English spellings throughout.\n"
+                  "• Straightforward and understandable vocabulary; avoid complex terms.\n"
+                  "• Presented as ONE SINGLE PARAGRAPH.\n"
+                  "• No more than 85 words maximum; but does not have to be exactly 85.\n"
+                  "• Considers the entire text content equally.\n"
+                  "• Uses semicolons (;) instead of em dashes (– or —).\n\n"
+                  "Here is the text to summarise:")
+    else: # points
+        prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n"
+                  "• For each distinct topic or section identified in the text, create a heading.\n"
+                  "• Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n"
+                  "• Immediately following each heading, list the key points as a bulleted list.\n"
+                  "• Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n"
+                  "• The text within each bullet point should NOT contain any bold formatting.\n"
+                  "• Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n"
+                  "• Use British English spellings throughout.\n"
+                  "• Avoid overly complex or advanced vocabulary.\n"
+                  "• Keep bullet points concise.\n"
+                  "• Ensure the entire summary takes no more than two minutes to read.\n"
+                  "• Consider the entire text's content, not just the beginning or a few topics.\n"
+                  "• Use semicolons (;) instead of em dashes (– or —).\n\n"
+                  "Here is the text to summarise:")
+    # Limit input length (Colab script used 500k, adjust if needed)
+    MAX_INPUT_LENGTH = 500000
+    if len(text) > MAX_INPUT_LENGTH:
+        logger.warning(f"Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating.")
+        text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
+    full_prompt = f"{prompt}\n\n{text}"
+    headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }
+    payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }
+    openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
     try:
+        async with httpx.AsyncClient(timeout=60.0) as client: # Timeout from Colab
+            logger.debug(f"Sending request to OpenRouter ({OPENROUTER_MODEL})...")
+            response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
+            logger.debug(f"Received status {response.status_code} from OpenRouter.")
+            if response.status_code == 200:
+                try:
+                    data = response.json()
+                    if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
+                        message = data["choices"][0].get("message")
+                        if message and isinstance(message, dict):
+                            summary = message.get("content")
+                            if summary: logger.info(f"Success generating summary via OpenRouter. Output len: {len(summary)}"); return summary.strip()
+                            else: logger.warning(f"OpenRouter success but content empty. Resp: {data}"); return "Sorry, the AI model returned an empty summary."
+                        else: logger.error(f"Unexpected message structure in OpenRouter resp: {message}. Full: {data}"); return "Sorry, could not parse AI response (format)."
+                    else: logger.error(f"Unexpected choices structure in OpenRouter resp: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse AI response (choices)."
+                except json.JSONDecodeError: logger.error(f"Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand AI response."
+                except Exception as e: logger.error(f"Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing AI response."
+            # Error handling matching Colab script
+            elif response.status_code == 401: logger.error("OpenRouter API key invalid (401)."); return "Error: AI model configuration key is invalid."
+            elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
+            elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
+            elif response.status_code == 500: logger.error(f"OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, AI service internal error."
+            else: logger.error(f"Unexpected status {response.status_code} from OpenRouter. Resp:{response.text[:500]}"); return f"Sorry, AI service returned unexpected status ({response.status_code})."
+    except httpx.TimeoutException: logger.error("Timeout connecting to OpenRouter API."); return "Sorry, request to AI model timed out."
+    except httpx.RequestError as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, error connecting to AI service."
+    except Exception as e: logger.error(f"Unexpected error in generate_summary (OpenRouter): {e}", exc_info=True); return "Sorry, unexpected error generating summary."
+# --- Background Task Processing (Orchestrates new fetch/summary functions) ---
 async def process_summary_task(
+    user_id: int, chat_id: int, message_id_to_edit: Optional[int],
+    url: str, summary_type: str, bot_token: str # bot_token needed to create instance
 ) -> None:
     task_id = f"{user_id}-{message_id_to_edit or 'new'}"
     logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
     background_request: Optional[BaseRequest] = None
     bot: Optional[Bot] = None
+    try: # Create background bot instance
+        background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 )
         bot = Bot(token=bot_token, request=background_request)
+    except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
+    content = None; user_feedback_message = None; success = False
     status_message_id = message_id_to_edit
+    message_to_delete_later_id : Optional[int] = None # Track ID of new status message
     try:
+        # --- Inform User Processing Started ---
+        processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
         if status_message_id:
             try:
+                await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id,
+                                           text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None )
+                logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
             except Exception as e:
+                logger.warning(f"[Task {task_id}] Could not edit original message {status_message_id}: {e}. Sending new.")
+                status_message_id = None # Will trigger sending new message
+        if not status_message_id: # Send new status message if needed
              try:
+                 status_message = await retry_bot_operation( bot.send_message, chat_id=chat_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN )
+                 if status_message: message_to_delete_later_id = status_message.message_id; logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
+                 else: raise RuntimeError("Failed to send status message after retries.")
+             except Exception as e: logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}"); raise # Stop if we can't inform user
+        # --- Main Fetching & Summarization ---
         try:
             await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
+            is_youtube = is_youtube_url(url)
+            logger.debug(f"[Task {task_id}] URL type: {'YouTube' if is_youtube else 'Website'}")
+            if is_youtube:
                 video_id = extract_youtube_id(url)
+                if video_id: content = await get_youtube_transcript(video_id, url) # Calls new func with fallbacks
+                else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
+                if not content and not user_feedback_message: # Set default fail message if get_youtube_transcript returned None
+                     user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
+            else: # Website
+                content = await get_website_content(url) # Calls new primary func
+                if not content: # Try fallback
+                    logger.warning(f"[Task {task_id}] Primary web scrape failed for {url}. Trying fallback API.")
+                    global URLTOTEXT_API_KEY # Access key
+                    if URLTOTEXT_API_KEY:
+                         await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
+                         content = await get_website_content_via_api(url, URLTOTEXT_API_KEY) # Calls new fallback func
+                         if not content: user_feedback_message = "Sorry, I couldn't fetch content from that website using either method (blocked/inaccessible/empty?)."
+                    else:
+                         user_feedback_message = "Sorry, I couldn't fetch content from that website (blocked/inaccessible/empty?). The fallback method is not configured."
+            # --- Generate Summary ---
             if content:
+                logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
                 await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
+                # Use new generate_summary function (keys accessed globally within it)
+                final_summary = await generate_summary(content, summary_type)
                 if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
+                    user_feedback_message = final_summary # Use error from summary func
+                    logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
+                else:
+                    # Success! Send summary (split if needed)
+                    max_length = 4096
+                    summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
+                    # Send first/only part
+                    await retry_bot_operation( bot.send_message, chat_id=chat_id, text=summary_parts[0],
+                                               parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True} )
+                    # Send subsequent parts
+                    for part in summary_parts[1:]: await asyncio.sleep(0.5); await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True} )
+                    success = True; logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts).")
+                    user_feedback_message = None # Clear any previous error
+            # --- Send Feedback if Fetching or Summary Failed ---
+            elif user_feedback_message: # Only send if content failed AND message exists
+                logger.warning(f"[Task {task_id}] Sending failure feedback: {user_feedback_message}")
+                await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )
         except Exception as e:
+            logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True)
+            user_feedback_message = "Oops! Something went really wrong while processing your request. Please try again later."
+            # Ensure we send this feedback if an unexpected exception occurs
+            try: await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
+            except Exception: logger.error(f"[Task {task_id}] Failed to send unexpected error feedback.")
+    except Exception as outer_e: # Catch critical errors like failing to send status message
+        logger.critical(f"[Task {task_id}] Critical outer error: {outer_e}", exc_info=True)
         try:
+             if bot: await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ Critical internal error occurred." )
+        except Exception: logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
     finally:
+        # --- Cleanup ---
+        delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
+        if delete_target_id and bot:
             try:
+                # Delete the original button message OR the status message we sent
+                await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id)
+                logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
+            except Exception as del_e: logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
+        # Close background bot's httpx client
         if background_request and hasattr(background_request, '_client') and background_request._client:
+             try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
+             except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
         logger.info(f"[Task {task_id}] Task completed. Success: {success}")
+# --- Telegram Bot Handlers (Unchanged structure, Colab text/logic adjusted) ---
 async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+    user = update.effective_user; mention = user.mention_html()
     if not user or not update.message: return
+    logger.info(f"User {user.id} used /start.")
+    await update.message.reply_html( f"👋 Hello {mention}! I can summarize YouTube links or website URLs.\n\nJust send me a link anytime!" )
 async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
     user = update.effective_user
     if not user or not update.message: return
+    logger.info(f"User {user.id} used /help.")
+    # Help text from Colab script
+    help_text = ( "🔍 How to use this bot:\n\n"
+                  "1. Send me any YouTube video link or website URL.\n"
+                  "2. I'll ask you how you want it summarized (paragraph or points).\n"
+                  "3. Click the button for your choice.\n"
+                  "4. Wait for the summary!\n\n"
+                  "I'll try multiple methods to get content if the first one fails (especially for YouTube transcripts).\n\n"
+                  "Commands:\n"
+                  "`/start` - Display welcome message\n"
+                  "`/help` - Show this help message" )
     await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
 async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
     if not update.message or not update.message.text: return
+    url = update.message.text.strip(); user = update.effective_user
     if not user: return
+    # Basic validation from Colab script
+    if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]:
+        logger.debug(f"Ignoring non-URL from {user.id}: {url}"); return
+    logger.info(f"User {user.id} sent potential URL: {url}")
+    context.user_data['url_to_summarize'] = url
+    context.user_data['original_message_id'] = update.message.message_id # Still useful potentially
+    # Keyboard text from Colab script
+    keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
+    reply_markup = InlineKeyboardMarkup(keyboard)
+    # Reply text from Colab script
+    await update.message.reply_text( f"Okay, I see this link:\n{url}\n\nHow would you like it summarized?",
+        reply_markup=reply_markup, disable_web_page_preview=True )
 async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+    """Handles button press, retrieves URL, and schedules background task."""
     query = update.callback_query
+    if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
+    user = query.from_user; summary_type = query.data; query_id = query.id
+    try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id}")
+    except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True)
     url = context.user_data.get('url_to_summarize')
+    message_id_to_edit = query.message.message_id # Use the message with the buttons
+    logger.info(f"User {user.id} chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
     if not url:
+        logger.warning(f"No URL in context for user {user.id} (cb {query_id}).")
+        try: await query.edit_message_text(text="Sorry, I couldn't find the URL associated with this request. Please send the link again.")
+        except Exception as e: logger.error(f"Failed edit 'URL not found' msg: {e}"); try: await context.bot.send_message(chat_id=user.id, text="Sorry, context lost. Send link again.") except Exception: pass
         return
+    # Clear context *before* starting task to prevent race conditions if user clicks fast
+    context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None)
+    logger.debug(f"Cleared URL context for user {user.id}")
+    # Check essential keys needed for the task *before* scheduling
+    global TELEGRAM_TOKEN, OPENROUTER_API_KEY
+    if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing!"); try: await query.edit_message_text(text="❌ Bot config error.") except Exception: pass; return
+    if not OPENROUTER_API_KEY: logger.error("OpenRouter key missing!"); try: await query.edit_message_text(text="❌ AI config error.") except Exception: pass; return
+    logger.info(f"Scheduling task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
+    # Pass the bot token to the background task so it can create its own instance
+    asyncio.create_task( process_summary_task( user_id=user.id, chat_id=query.message.chat_id, message_id_to_edit=message_id_to_edit,
+            url=url, summary_type=summary_type, bot_token=TELEGRAM_TOKEN ), name=f"SummaryTask-{user.id}-{message_id_to_edit}" )
+# --- Error Handler, Bot Setup, Lifespan, Routes (Largely Unchanged, Ensure Keys Read) ---
 async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
     """Log Errors caused by Updates."""
+    # Add specific error types to ignore if they are handled elsewhere or benign
+    ignore_errors = (AttributeError, ) # Example: Ignore cleanup errors if handled in finally blocks
+    if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): # Be more specific
+         logger.debug(f"Ignoring known/handled error in error_handler: {context.error}")
          return
     logger.error("Exception while handling an update:", exc_info=context.error)
 async def setup_bot_config() -> Application:
     """Configures the PTB Application."""
     logger.info("Configuring Telegram Application...")
+    global TELEGRAM_TOKEN # Ensure global token is accessible
+    if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
+    custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
+    application = Application.builder().token(TELEGRAM_TOKEN).request(custom_request).build()
     application.add_handler(CommandHandler("start", start))
     application.add_handler(CommandHandler("help", help_command))
     application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
     application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
     application.add_error_handler(error_handler)
     logger.info("Telegram application handlers configured.")
     return application
 @contextlib.asynccontextmanager
 async def lifespan(app: Starlette):
     """Handles PTB startup and shutdown during ASGI lifespan."""
+    global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN # Access globals
+    logger.info("ASGI Lifespan: Startup initiated...")
+    # Essential key check already happened globally, but double-check token
+    if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
     try:
         ptb_app = await setup_bot_config()
         await ptb_app.initialize()
         bot_info = await ptb_app.bot.get_me()
+        logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
+        # --- Webhook setup (unchanged from previous version) ---
         current_webhook_info = await ptb_app.bot.get_webhook_info()
         if current_webhook_info and current_webhook_info.url:
+            logger.info(f"Found existing webhook: {current_webhook_info.url}. Deleting...")
             try:
+                if await ptb_app.bot.delete_webhook(drop_pending_updates=True): logger.info("Webhook deleted.")
+                else: logger.warning("Failed delete webhook (API returned False).")
+            except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1)
         space_host = os.environ.get("SPACE_HOST")
+        webhook_path = "/webhook"; full_webhook_url = None
         if space_host:
+            protocol = "https://"; host = space_host.split('://')[-1]
             full_webhook_url = f"{protocol}{host.rstrip('/')}{webhook_path}"
             if full_webhook_url:
+                logger.info(f"Setting webhook: {full_webhook_url}")
+                set_webhook_args = { "url": full_webhook_url, "allowed_updates": Update.ALL_TYPES, "drop_pending_updates": True }
+                if WEBHOOK_SECRET: set_webhook_args["secret_token"] = WEBHOOK_SECRET; logger.info("Using webhook secret.")
+                await asyncio.sleep(1.0)
                 try:
                     await ptb_app.bot.set_webhook(**set_webhook_args)
                     webhook_info = await ptb_app.bot.get_webhook_info()
+                    if webhook_info.url == full_webhook_url: logger.info(f"Webhook set: URL='{webhook_info.url}', Secret={bool(WEBHOOK_SECRET)}")
+                    else: logger.error(f"Webhook URL mismatch! Expected '{full_webhook_url}', Got '{webhook_info.url}'"); raise RuntimeError("Webhook URL mismatch.")
                     await ptb_app.start()
+                    logger.info("PTB Application started (webhook mode).")
+                except Exception as e: logger.error(f"FATAL: Failed set webhook: {e}", exc_info=True); raise RuntimeError(f"Failed set webhook: {e}") from e
+            else: logger.critical("Could not construct webhook URL."); raise RuntimeError("Webhook URL undetermined.")
+        else: logger.critical("SPACE_HOST missing."); raise RuntimeError("SPACE_HOST env var missing.")
+        # --- End Webhook Setup ---
         logger.info("ASGI Lifespan: Startup complete.")
+        yield # App runs
     except Exception as startup_err:
         logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
         if ptb_app:
              if ptb_app.running: await ptb_app.stop()
              await ptb_app.shutdown()
         raise
+    finally: # Shutdown
+        logger.info("ASGI Lifespan: Shutdown initiated...")
         if ptb_app:
+            if ptb_app.running: logger.info("Stopping PTB..."); await ptb_app.stop()
+            logger.info("Shutting down PTB..."); await ptb_app.shutdown()
+            logger.info("PTB Application shut down.")
+        else: logger.info("PTB application not initialized or failed.")
         logger.info("ASGI Lifespan: Shutdown complete.")
 async def health_check(request: Request) -> PlainTextResponse:
     """Basic health check endpoint."""
+    global OPENROUTER_MODEL, APIFY_ACTOR_ID, _apify_token_exists
     bot_status = "Not Initialized"
     if ptb_app and ptb_app.bot:
         try:
+             if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
+             else: bot_status = "Initialized/Not running"
+        except Exception as e: bot_status = f"Error checking status: {e}"
+    # Include model/actor info in health check
+    return PlainTextResponse(f"TG Bot Summarizer - Status: {bot_status}\nModel: {OPENROUTER_MODEL}\nApify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}")
 async def telegram_webhook(request: Request) -> Response:
     """Webhook endpoint called by Telegram."""
+    global WEBHOOK_SECRET # Access global
+    if not ptb_app: logger.error("Webhook recv but PTB not initialized."); return PlainTextResponse('Bot not initialized', status_code=503)
+    if not ptb_app.running: logger.warning("Webhook recv but PTB not running."); return PlainTextResponse('Bot not running', status_code=503)
     try:
+        # Secret check (unchanged)
         if WEBHOOK_SECRET:
              token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
+             if token_header != WEBHOOK_SECRET: logger.warning(f"Webhook invalid secret. Header: '{token_header}'"); return Response(content="Invalid secret token", status_code=403)
         update_data = await request.json()
         update = Update.de_json(data=update_data, bot=ptb_app.bot)
         logger.debug(f"Processing update_id: {update.update_id} via webhook")
         await ptb_app.process_update(update)
+        return Response(status_code=200) # OK
+    except json.JSONDecodeError: logger.error("Webhook invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
+    except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
+# --- Create Starlette ASGI Application (Unchanged) ---
+app = Starlette( debug=False, lifespan=lifespan, routes=[
         Route("/", endpoint=health_check, methods=["GET"]),
+        Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
 logger.info("Starlette ASGI application created with native routes.")
+# --- Development Server Block (Unchanged) ---
 if __name__ == '__main__':
     import uvicorn
+    logger.warning("Running in development mode using Uvicorn directly")
     log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
     local_port = int(os.environ.get('PORT', 8080))
+    uvicorn.run("__main__:app", host='0.0.0.0', port=local_port, log_level=log_level, reload=True)