Spaces:

fmab777
/

telegram-summary-bot

Running

App Files Files Community

fmab777 commited on Apr 5

Commit

d6e3c43

verified ·

1 Parent(s): 4302db4

Update main.py

Browse files

Files changed (1) hide show

main.py +492 -164

main.py CHANGED Viewed

@@ -53,12 +53,13 @@ except ImportError:
 # --- Google Gemini ---
 try:
     import google.generativeai as genai
-    from google.generativeai.types import HarmCategory, HarmBlockThreshold
     _gemini_available = True
 except ImportError:
     genai = None
     HarmCategory = None
     HarmBlockThreshold = None
     _gemini_available = False
 # logger defined later
@@ -111,7 +112,8 @@ GEMINI_API_KEY = get_secret('GEMINI_API_KEY')         # Primary Summarizer
 # Models (User can still configure via env vars)
 OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Model
 APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
-GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash-001") # Primary Model - Updated May 2024
 # --- Configuration Checks ---
 if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
@@ -155,6 +157,10 @@ if _gemini_primary_enabled:
         logger.error(f"Failed to configure Google GenAI client: {e}")
         _gemini_primary_enabled = False
 # --- Retry Decorator ---
 # (Remains the same)
 @retry(
@@ -333,12 +339,37 @@ async def get_website_content_via_crawl4ai(url: str) -> Optional[str]:
         return None
     logger.info(f"[Crawl4AI Primary] Attempting to crawl URL: {url}")
     try:
-        # Use AsyncWebCrawler context manager for proper resource handling
-        async with AsyncWebCrawler() as crawler:
             # Use arun for a single URL crawl
-            # We primarily want the Markdown output as it's designed for LLMs
-            # Add a reasonable timeout
             result = await crawler.arun(url=url, crawler_strategy="playwright", timeout=90) # 90 sec timeout
         if result and result.markdown:
@@ -363,9 +394,15 @@ async def get_website_content_via_crawl4ai(url: str) -> Optional[str]:
     except asyncio.TimeoutError:
         logger.error(f"[Crawl4AI Primary] Timeout occurred while crawling {url}")
         return None
     except Exception as e:
-        logger.error(f"[Crawl4AI Primary] Unexpected error during crawl for {url}: {e}", exc_info=True)
-        # Log specific crawl4ai errors if they become apparent
         return None
@@ -381,8 +418,8 @@ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[
             response.raise_for_status()
             content_type = response.headers.get('content-type', '').lower()
             if 'html' not in content_type: logger.warning(f"[Web Scrape BS4] Non-HTML content type from {url}: {content_type}"); return None
-            try: return response.text
-            except Exception as e: logger.error(f"[Web Scrape BS4] Error decoding response for {url}: {e}"); return None
     except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape BS4] HTTP error {e.response.status_code} fetching {url}: {e}")
     except httpx.TimeoutException: logger.error(f"[Web Scrape BS4] Timeout error fetching {url}")
     except httpx.TooManyRedirects: logger.error(f"[Web Scrape BS4] Too many redirects fetching {url}")
@@ -392,6 +429,7 @@ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[
 async def get_website_content_bs4(url: str) -> Optional[str]:
     """Fetches and parses website content using BeautifulSoup (Fallback 1)."""
     if not url: logger.error("[BS4 Fallback] get_website_content_bs4: No URL"); return None
     logger.info(f"[BS4 Fallback] Attempting basic fetch & parse for: {url}")
     html_content = await fetch_url_content_for_scrape(url)
@@ -404,20 +442,26 @@ async def get_website_content_bs4(url: str) -> Optional[str]:
         def parse_html(content):
             soup = BeautifulSoup(content, DEFAULT_PARSER)
             # Remove common non-content elements
-            for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]):
                 element.extract()
-            # Try to find main content areas
-            main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
-            target_element = main_content if main_content else soup.body
-            if not target_element:
-                logger.warning(f"[BS4 Fallback] Could not find body/main for parsing {url}")
-                return None
-            # Extract text, clean up whitespace
             lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
-            text = " ".join(lines)
-            if not text:
-                logger.warning(f"[BS4 Fallback] Extracted text is empty after cleaning for {url}")
-                return None
             return text
         text_content = await asyncio.to_thread(parse_html, html_content)
@@ -434,7 +478,7 @@ async def get_website_content_bs4(url: str) -> Optional[str]:
 # Fallback 2: urltotext.com API
 async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
     """Fetches website content using urltotext.com API (Fallback 2)."""
-    # ... (Keep existing implementation, maybe adjust log prefix) ...
     if not url: logger.error("[API Fallback] No URL"); return None
     if not api_key: logger.error("[API Fallback] urltotext.com API key missing."); return None
     logger.info(f"[API Fallback] Attempting fetch for: {url} using urltotext.com API")
@@ -455,83 +499,286 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
                     else: logger.warning(f"[API Fallback] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
                 except json.JSONDecodeError: logger.error(f"[API Fallback] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
                 except Exception as e: logger.error(f"[API Fallback] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
-            elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[API Fallback] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
             else: logger.error(f"[API Fallback] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
     except httpx.TimeoutException: logger.error(f"[API Fallback] Timeout connecting to urltotext.com API for {url}"); return None
     except httpx.RequestError as e: logger.error(f"[API Fallback] Request error connecting to urltotext.com API for {url}: {e}"); return None
     except Exception as e: logger.error(f"[API Fallback] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
 # --- Summarization Functions ---
-# (_call_gemini, _call_openrouter, generate_summary remain the same)
 async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
-    # ... (Keep existing implementation) ...
     global GEMINI_MODEL, _gemini_primary_enabled
     if not _gemini_primary_enabled:
         logger.error("[Gemini Primary] Called but is disabled.");
         return None, "Error: Primary AI service (Gemini) not configured/available."
     logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
-    # Define prompts (Keep existing prompts)
-    if summary_type == "paragraph": prompt = ("...") # Your existing paragraph prompt
-    else: prompt = ("...") # Your existing points prompt
-    # ... (rest of the Gemini call logic remains the same) ...
-    # Including length check, safety settings, API call, response handling
-    # --- TEMPORARY PLACEHOLDER for brevity ---
-    # Your actual Gemini call logic here... this is just a stub
-    # Make sure to handle response parsing and errors correctly as before
-    # --- END TEMPORARY PLACEHOLDER ---
-    return "Gemini summary placeholder", None # Replace with actual implementation
 async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
-    # ... (Keep existing implementation) ...
     global OPENROUTER_API_KEY, OPENROUTER_MODEL, _openrouter_fallback_enabled
     if not _openrouter_fallback_enabled:
         logger.error("[OpenRouter Fallback] Called but is disabled.");
         return None, "Error: Fallback AI service (OpenRouter) not configured/available."
     logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
-    # Define prompts (Keep existing prompts)
-    if summary_type == "paragraph": prompt = ("...") # Your existing paragraph prompt
-    else: prompt = ("...") # Your existing points prompt
-    # ... (rest of the OpenRouter call logic remains the same) ...
-    # Including length check, headers, payload, API call, response handling
-    # --- TEMPORARY PLACEHOLDER for brevity ---
-    # Your actual OpenRouter call logic here... this is just a stub
-    # Make sure to handle response parsing and errors correctly as before
-    # --- END TEMPORARY PLACEHOLDER ---
-    return "OpenRouter summary placeholder", None # Replace with actual implementation
 async def generate_summary(text: str, summary_type: str) -> str:
-    # ... (Keep existing implementation - calls _call_gemini then _call_openrouter) ...
     global _gemini_primary_enabled, _openrouter_fallback_enabled, GEMINI_MODEL, OPENROUTER_MODEL
     logger.info(f"[Summary Generation] Starting process. Primary: Gemini ({GEMINI_MODEL}), Fallback: OpenRouter ({OPENROUTER_MODEL})")
-    final_summary: Optional[str] = None; error_message: Optional[str] = None
     if _gemini_primary_enabled:
         logger.info(f"[Summary Generation] Attempting primary AI: Gemini ({GEMINI_MODEL})")
-        final_summary, error_message = await _call_gemini(text, summary_type)
-        if final_summary: logger.info(f"[Summary Generation] Success with primary AI (Gemini)."); return final_summary
-        else: logger.warning(f"[Summary Generation] Primary AI (Gemini) failed. Error: {error_message}. Proceeding to fallback.")
     else:
         logger.warning("[Summary Generation] Primary AI (Gemini) disabled. Proceeding to fallback.")
         error_message = "Primary AI (Gemini) unavailable."
     if _openrouter_fallback_enabled:
         logger.info(f"[Summary Generation] Attempting fallback AI: OpenRouter ({OPENROUTER_MODEL})")
         fallback_summary, fallback_error = await _call_openrouter(text, summary_type)
-        if fallback_summary: logger.info(f"[Summary Generation] Success with fallback AI (OpenRouter)."); return fallback_summary
         else:
             logger.error(f"[Summary Generation] Fallback AI (OpenRouter) also failed. Error: {fallback_error}")
-            if error_message: return f"{error_message} Fallback AI ({OPENROUTER_MODEL}) also failed: {fallback_error}"
-            else: return f"Fallback AI ({OPENROUTER_MODEL}) failed: {fallback_error}"
     else:
         logger.error("[Summary Generation] Fallback AI (OpenRouter) is disabled. Cannot proceed.")
-        if error_message: return f"{error_message} Fallback AI is also unavailable."
-        else: return "Error: Both primary and fallback AI services are unavailable."
-    logger.error("[Summary Generation] Reached end of function unexpectedly.")
-    return "Sorry, an unknown error occurred during summary generation."
 # --- Main Processing Task ---
@@ -573,7 +820,8 @@ async def process_summary_task(
                     message_id=status_message_id,
                     text=processing_message_text,
                     parse_mode=ParseMode.HTML, # Use HTML for escaped URL
-                    reply_markup=None
                 )
                 logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
             except Exception as e:
@@ -586,17 +834,16 @@ async def process_summary_task(
                     bot.send_message,
                     chat_id=chat_id,
                     text=processing_message_text,
-                    parse_mode=ParseMode.HTML # Use HTML for escaped URL
                 )
                 if status_message:
                     message_to_delete_later_id = status_message.message_id
                     logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
                 else:
-                    # This should ideally be caught by retry_bot_operation raising an error
                     raise RuntimeError("Failed to send status message after retries.")
             except Exception as e:
                 logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}")
-                # Don't raise here, try to continue if possible, but log critical failure
                 user_feedback_message = "Sorry, there was an issue starting the process."
                 # Attempt to send final feedback later if possible
@@ -626,7 +873,7 @@ async def process_summary_task(
                 logger.warning(f"[Task {task_id}] Crawl4AI failed for {url}. Attempting BeautifulSoup (Fallback 1)...")
                 try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
                 except Exception: pass
-                content = await get_website_content_bs4(url) # Use the renamed BS4 function
                 if not content:
                     logger.warning(f"[Task {task_id}] BeautifulSoup also failed for {url}. Attempting API (Fallback 2)...")
@@ -636,11 +883,18 @@ async def process_summary_task(
                         except Exception: pass
                         content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
                         if not content:
-                            logger.error(f"[Task {task_id}] All website fetching methods failed for {url}.")
-                            user_feedback_message = "Sorry, I couldn't fetch content from that website using any method (blocked/inaccessible/empty?)."
                     else:
                         logger.warning(f"[Task {task_id}] API fallback is disabled. Cannot attempt Fallback 2.")
-                        user_feedback_message = "Sorry, I couldn't fetch content from that website using the primary or secondary methods, and the API fallback is not configured."
         # --- Generate Summary if Content was Fetched ---
@@ -656,73 +910,107 @@ async def process_summary_task(
                 logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
             else:
                 # Success - Send the summary
-                max_length = 4096 # Telegram's message length limit
                 summary_parts = []
                 current_part = ""
-                # Split respecting Markdown formatting (basic line breaks)
-                for line in final_summary.splitlines(keepends=True):
-                     if len(current_part) + len(line) <= max_length:
-                         current_part += line
                      else:
-                         summary_parts.append(current_part.strip())
-                         current_part = line
-                if current_part.strip(): # Add the last part
                     summary_parts.append(current_part.strip())
-                logger.info(f"[Task {task_id}] Summary generated (len: {len(final_summary)}). Sending in {len(summary_parts)} part(s).")
-                # Send first part (potentially replacing the "Processing" message if it was a new one)
-                first_message_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
                 message_sent = False
-                if first_message_target_id:
                     try:
                         # Try editing the status message first
                         await retry_bot_operation(
                             bot.edit_message_text,
                             chat_id=chat_id,
-                            message_id=first_message_target_id,
                             text=summary_parts[0],
                             parse_mode=None, # Send as plain text initially, safer
                             link_preview_options={'is_disabled': True}
                         )
-                        logger.debug(f"[Task {task_id}] Edited message {first_message_target_id} with first summary part.")
-                        # Prevent this message from being deleted later
-                        if message_to_delete_later_id == first_message_target_id: message_to_delete_later_id = None
-                        elif status_message_id == first_message_target_id: status_message_id = None
                         message_sent = True
                     except Exception as edit_err:
-                         logger.warning(f"[Task {task_id}] Failed to edit message {first_message_target_id} with summary: {edit_err}. Sending new message instead.")
                          # If edit fails, fall through to send a new message
                 if not message_sent:
-                     await retry_bot_operation(
                         bot.send_message,
                         chat_id=chat_id,
                         text=summary_parts[0],
                         parse_mode=None,
                         link_preview_options={'is_disabled': True}
-                    )
-                     logger.debug(f"[Task {task_id}] Sent first summary part as new message.")
-                # Send remaining parts
-                for i, part in enumerate(summary_parts[1:], start=2):
-                    await asyncio.sleep(0.5) # Small delay between parts
-                    await retry_bot_operation(
-                        bot.send_message,
-                        chat_id=chat_id,
-                        text=part,
-                        parse_mode=None,
-                        link_preview_options={'is_disabled': True}
-                    )
-                    logger.debug(f"[Task {task_id}] Sent summary part {i}/{len(summary_parts)}.")
-                success = True
-                user_feedback_message = None # Clear feedback message on success
         # --- Handle Cases Where No Content Was Fetched or Summary Failed ---
-        elif user_feedback_message:
-            logger.warning(f"[Task {task_id}] Sending failure feedback to user: {user_feedback_message}")
             try:
                 # Try editing the status message first
                 feedback_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
@@ -738,8 +1026,9 @@ async def process_summary_task(
                             reply_markup=None # Remove buttons
                         )
                         logger.debug(f"[Task {task_id}] Edited message {feedback_target_id} with failure feedback.")
                         if message_to_delete_later_id == feedback_target_id: message_to_delete_later_id = None
-                        elif status_message_id == feedback_target_id: status_message_id = None
                         message_sent = True
                     except Exception as edit_err:
                          logger.warning(f"[Task {task_id}] Failed to edit message {feedback_target_id} with failure feedback: {edit_err}. Sending new message instead.")
@@ -759,27 +1048,34 @@ async def process_summary_task(
         # Catch-all for unexpected errors during the main processing logic
         logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True)
         user_feedback_message = "Oops! Something went wrong while processing your request. Please try again later."
-        try:
-            # Attempt to send a final error message
-            await retry_bot_operation(
-                bot.send_message,
-                chat_id=chat_id,
-                text=user_feedback_message
-            )
-        except Exception as final_err:
-            logger.error(f"[Task {task_id}] Failed to send the final unexpected error feedback: {final_err}")
     finally:
         # --- Cleanup ---
-        # Delete the "Processing..." or original button message if it wasn't edited/replaced
-        delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
-        if delete_target_id and bot:
             try:
-                await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id)
-                logger.debug(f"[Task {task_id}] Deleted original status/button message {delete_target_id}")
             except Exception as del_e:
-                # Log as warning, not critical if deletion fails
-                logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
         # Close the background bot's HTTP client
         if background_request and hasattr(background_request, '_client') and background_request._client:
@@ -794,8 +1090,7 @@ async def process_summary_task(
 # --- Telegram Handlers ---
 # (start, help_command, handle_potential_url, handle_summary_type_callback, error_handler)
-# These functions remain largely the same, only minor logging/config checks might be adjusted if needed.
-# The core logic change is within process_summary_task.
 async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
     # ... (Keep existing implementation) ...
@@ -809,15 +1104,16 @@ async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> No
     user = update.effective_user
     if not user or not update.message: return
     logger.info(f"User {user.id} ({user.username or 'N/A'}) used /help.")
     help_text = ( "🔍 **How to use:**\n\n"
                  "1. Send me any YouTube video link or website URL.\n"
                  "2. I'll ask how you want it summarised (paragraph or points).\n"
                  "3. Click the button for your choice.\n"
                  "4. Wait for the summary!\n\n"
                  "⚙️ **Behind the scenes:**\n"
-                 "• **Websites:** I'll first try a smart crawl (`Crawl4AI`), then a basic scrape (`BeautifulSoup`), and finally an external API (`urltotext.com`) if needed.\n"
-                 "• **YouTube:** I use `youtube-transcript-api` first, then fall back to `Supadata` and `Apify` APIs if transcripts aren't found directly.\n"
-                 "• **Summaries:** Generated using Google `Gemini` (primary) or `OpenRouter` models (fallback).\n\n"
                  "**Commands:**\n"
                  "`/start` - Display welcome message\n"
                  "`/help` - Show this help message" )
@@ -830,10 +1126,9 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
     if not user: return
     # Basic URL validation
     if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]:
-        # Maybe add a reply here? "Please send a valid URL starting with http:// or https://"
         logger.debug(f"Ignoring non-URL from {user.id}: {url}")
         # Optionally reply to the user that it doesn't look like a valid URL
-        # await update.message.reply_text("Hmm, that doesn't look like a valid web URL. Please make sure it starts with `http://` or `https://`.", parse_mode=ParseMode.MARKDOWN)
         return
     logger.info(f"User {user.id} ({user.username or 'N/A'}) sent potential URL: {url}")
     # Store URL and original message ID in user_data
@@ -850,7 +1145,7 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
     if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
     user = query.from_user; summary_type = query.data; query_id = query.id
     try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id} ({user.username or 'N/A'})")
-    except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True)
     url = context.user_data.get('url_to_summarize')
     message_id_to_edit = query.message.message_id # The message with the buttons
@@ -859,31 +1154,27 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
     if not url:
         logger.warning(f"No URL in context for user {user.id} (cb {query_id}). Expired?")
         try:
-             # Edit the message where the button was clicked
              await query.edit_message_text(text="Sorry, I seem to have lost the context for that link. 🤔 Please send the URL again.", reply_markup=None)
         except BadRequest as e:
             if "message is not modified" in str(e).lower(): pass # Ignore if text is the same
             else: logger.error(f"Failed edit 'URL not found' msg: {e}")
-        except Exception as e:
-            logger.error(f"Failed edit 'URL not found' msg: {e}")
-        # Do not proceed further
-        return
-    # Clear context *after* successfully retrieving URL and scheduling task
-    # context.user_data.pop('url_to_summarize', None)
-    # context.user_data.pop('original_message_id', None)
-    # logger.debug(f"Cleared URL context for user {user.id}") # Moved clearing to after task creation
     # Check necessary configurations before scheduling
     global TELEGRAM_TOKEN, _gemini_primary_enabled, _openrouter_fallback_enabled
     if not TELEGRAM_TOKEN:
         logger.critical("TG TOKEN missing! Cannot schedule task.")
-        try: await query.edit_message_text(text="❌ Bot configuration error (Token Missing). Cannot proceed.")
         except Exception: pass
         return
     if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
         logger.critical("Neither Gemini nor OpenRouter API keys are configured/valid! Cannot summarize.")
-        try: await query.edit_message_text(text="❌ AI configuration error: No summarization models are available. Cannot proceed.")
         except Exception: pass
         return
     # Log warnings if one model is missing, but proceed if at least one is available
@@ -899,23 +1190,34 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
             message_id_to_edit=message_id_to_edit, # Pass the ID of the message with buttons
             url=url,
             summary_type=summary_type,
-            bot_token=TELEGRAM_TOKEN
         ),
         name=f"SummaryTask-{user.id}-{message_id_to_edit}"
     )
-    # Clear context AFTER scheduling the task to prevent race conditions
     context.user_data.pop('url_to_summarize', None)
     context.user_data.pop('original_message_id', None)
     logger.debug(f"Cleared URL context for user {user.id} after scheduling task.")
 async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
     # ... (Keep existing implementation) ...
-    ignore_errors = (AttributeError, BadRequest, ) # Add BadRequest to ignored types if retry handles it
     if isinstance(context.error, ignore_errors):
-        ignore_messages = ["message is not modified", "query is too old", "message to edit not found"]
-        if any(msg in str(context.error).lower() for msg in ignore_messages):
-            logger.debug(f"Ignoring known/handled error in error_handler: {context.error}")
             return
     logger.error("Exception while handling an update:", exc_info=context.error)
     # Consider notifying the user about unexpected errors if appropriate and possible
@@ -935,7 +1237,9 @@ async def setup_bot_config() -> Application:
     # Add Handlers
     application.add_handler(CommandHandler("start", start))
     application.add_handler(CommandHandler("help", help_command))
-    application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND & filters.Entity("url") | filters.Entity("text_link"), handle_potential_url)) # More specific filter
     application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
     # Error Handler
     application.add_error_handler(error_handler)
@@ -1004,8 +1308,12 @@ async def lifespan(app: Starlette):
                 logger.error(f"FATAL: Failed to set webhook: {e}", exc_info=True)
                 raise RuntimeError(f"Failed to set webhook: {e}") from e
         else:
-            logger.critical("Could not construct webhook URL (SPACE_HOST env var might be missing or invalid).")
-            raise RuntimeError("Webhook URL undetermined.")
         logger.info("ASGI Lifespan: Startup complete.");
         yield # Application runs here
@@ -1040,30 +1348,38 @@ async def lifespan(app: Starlette):
 async def health_check(request: Request) -> PlainTextResponse:
-    # ... (Keep existing implementation, maybe add crawl4ai status?) ...
-    global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled, _crawl4ai_primary_web_enabled, _urltotext_fallback_enabled
     bot_status = "Not Initialized"
     bot_username = "N/A"
-    if ptb_app and ptb_app.bot:
         try:
-            if ptb_app.running:
-                 # Cache bot_info slightly? Or fetch on demand.
                  bot_info = await ptb_app.bot.get_me()
                  bot_username = f"@{bot_info.username}"
-                 bot_status = f"Running ({bot_username})"
             else: bot_status = "Initialized/Not running"
         except Exception as e: bot_status = f"Error checking status: {e}"
     health_info = [
-        f"TG Bot Summariser - Status: {bot_status}",
-        f"Primary Web Scraper: {'Crawl4AI' if _crawl4ai_primary_web_enabled else 'N/A (Disabled)'}",
         f"Fallback Web Scraper 1: BeautifulSoup",
-        f"Fallback Web Scraper 2: {'urltotext.com API' if _urltotext_fallback_enabled else 'N/A (Disabled)'}",
-        f"Primary Summarizer: {'Gemini (' + GEMINI_MODEL + ')' if _gemini_primary_enabled else 'N/A (Disabled)'}",
-        f"Fallback Summarizer: {'OpenRouter (' + OPENROUTER_MODEL + ')' if _openrouter_fallback_enabled else 'N/A (Disabled)'}",
         f"Primary YT Transcript: youtube-transcript-api",
-        f"Fallback YT Transcript 1: {'Supadata API' if SUPADATA_API_KEY else 'N/A (Disabled)'}",
-        f"Fallback YT Transcript 2: {'Apify (' + APIFY_ACTOR_ID + ')' if _apify_token_exists else 'N/A (No Token)'}"
     ]
     return PlainTextResponse("\n".join(health_info))
@@ -1124,6 +1440,18 @@ if __name__ == '__main__':
     log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
     # Use the PORT env var for local running too, defaulting to 8080
     local_port = int(os.environ.get('PORT', 8080))
     uvicorn.run(
         "main:app",
         host='0.0.0.0',

 # --- Google Gemini ---
 try:
     import google.generativeai as genai
+    from google.generativeai.types import HarmCategory, HarmBlockThreshold, GenerateContentResponse
     _gemini_available = True
 except ImportError:
     genai = None
     HarmCategory = None
     HarmBlockThreshold = None
+    GenerateContentResponse = None # Add this for type hinting if needed
     _gemini_available = False
 # logger defined later
 # Models (User can still configure via env vars)
 OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Model
 APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
+GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-1.5-flash-latest") # Use the 1.5 flash model directly
+# Using gemini-1.5-flash-latest is generally recommended over gemini-2.0-flash-001
 # --- Configuration Checks ---
 if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
         logger.error(f"Failed to configure Google GenAI client: {e}")
         _gemini_primary_enabled = False
+# --- Constants ---
+MAX_SUMMARY_CHUNK_SIZE = 4000 # Max characters per Telegram message (allow buffer)
+MAX_INPUT_TOKEN_APPROX = 1000000 # Gemini 1.5 Flash context window (approx chars) - adjust if needed
 # --- Retry Decorator ---
 # (Remains the same)
 @retry(
         return None
     logger.info(f"[Crawl4AI Primary] Attempting to crawl URL: {url}")
+    # Define a writable cache directory (use /tmp in container environments)
+    # Create the directory path beforehand to avoid potential race conditions or permission issues within the library
+    cache_dir_path = "/tmp/.crawl4ai" # CHANGED: Use /tmp
+    try:
+        os.makedirs(cache_dir_path, exist_ok=True)
+        logger.info(f"[Crawl4AI Primary] Ensured cache directory exists: {cache_dir_path}")
+    except OSError as e:
+        logger.error(f"[Crawl4AI Primary] Failed to create cache directory {cache_dir_path}: {e}. Crawl may fail.")
+        # Don't return here, let the crawler try anyway, it might handle it internally or use default
+    except Exception as e:
+         logger.error(f"[Crawl4AI Primary] Unexpected error creating cache directory {cache_dir_path}: {e}")
     try:
+        # Use AsyncWebCrawler context manager with explicit cache_dir
+        # NOTE: Pass cache_dir here if the library supports it via __init__ or a config object
+        # Checking crawl4ai docs/source, AsyncWebCrawler doesn't directly take cache_dir in __init__.
+        # It seems to rely on environment variables or default home resolution.
+        # The PermissionError happens in RobotsParser -> get_home_folder -> os.makedirs.
+        # WORKAROUND: We might need to adjust the environment or hope setting HOME=/app in Dockerfile is enough
+        # *if* the library correctly uses HOME. Let's test *without* explicit cache_dir first,
+        # relying on HOME=/app and the prior os.makedirs call. If it still fails, we need a different approach.
+        # UPDATE: The traceback shows it uses utils.get_home_folder(). Let's stick with HOME=/app for now
+        # and see if the permission error was transient or specific to the '.models' subdir.
+        # If it persists, we might need to fork/modify crawl4ai or find another way to configure its paths.
+        # Let's *try* passing cache_dir anyway, maybe it's an undocumented/newer feature
+        async with AsyncWebCrawler(cache_dir=cache_dir_path) as crawler: # TRY passing cache_dir
+            logger.info(f"[Crawl4AI Primary] Initialized with explicit cache_dir: {cache_dir_path}")
             # Use arun for a single URL crawl
             result = await crawler.arun(url=url, crawler_strategy="playwright", timeout=90) # 90 sec timeout
         if result and result.markdown:
     except asyncio.TimeoutError:
         logger.error(f"[Crawl4AI Primary] Timeout occurred while crawling {url}")
         return None
+    except PermissionError as e: # Catch the specific error
+        logger.error(f"[Crawl4AI Primary] Permission denied during crawl for {url}. Likely filesystem issue in container. Error: {e}", exc_info=True)
+        return None # Fail gracefully for this method
     except Exception as e:
+        # Log type error if cache_dir isn't accepted
+        if "unexpected keyword argument 'cache_dir'" in str(e):
+            logger.error(f"[Crawl4AI Primary] AsyncWebCrawler does not accept 'cache_dir'. Remove this argument. Error: {e}")
+        else:
+            logger.error(f"[Crawl4AI Primary] Unexpected error during crawl for {url}: {e}", exc_info=True)
         return None
             response.raise_for_status()
             content_type = response.headers.get('content-type', '').lower()
             if 'html' not in content_type: logger.warning(f"[Web Scrape BS4] Non-HTML content type from {url}: {content_type}"); return None
+            try: return response.text # Use response.text to let httpx handle decoding
+            except Exception as e: logger.error(f"[Web Scrape BS4] Error getting response text for {url}: {e}"); return None
     except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape BS4] HTTP error {e.response.status_code} fetching {url}: {e}")
     except httpx.TimeoutException: logger.error(f"[Web Scrape BS4] Timeout error fetching {url}")
     except httpx.TooManyRedirects: logger.error(f"[Web Scrape BS4] Too many redirects fetching {url}")
 async def get_website_content_bs4(url: str) -> Optional[str]:
     """Fetches and parses website content using BeautifulSoup (Fallback 1)."""
+    # ... (Keep existing implementation) ...
     if not url: logger.error("[BS4 Fallback] get_website_content_bs4: No URL"); return None
     logger.info(f"[BS4 Fallback] Attempting basic fetch & parse for: {url}")
     html_content = await fetch_url_content_for_scrape(url)
         def parse_html(content):
             soup = BeautifulSoup(content, DEFAULT_PARSER)
             # Remove common non-content elements
+            for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "video", "audio"]):
                 element.extract()
+            # Try to find main content areas more broadly
+            selectors = ['main', 'article', '[role="main"]', '#content', '.content', '#main-content', '.main-content', '#body', '.body', '#article-body', '.article-body']
+            target_element = None
+            for selector in selectors:
+                target_element = soup.select_one(selector)
+                if target_element: break
+            if not target_element: target_element = soup.body # Fallback to body
+            if not target_element: logger.warning(f"[BS4 Fallback] Could not find body/main for parsing {url}"); return None
+            # Extract text, clean up whitespace aggressively
             lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
+            text = " ".join(lines) # Join lines with spaces
+            # Basic post-cleaning
+            text = re.sub(r'\s{2,}', ' ', text).strip() # Replace multiple spaces with single space
+            if not text: logger.warning(f"[BS4 Fallback] Extracted text is empty after cleaning for {url}"); return None
             return text
         text_content = await asyncio.to_thread(parse_html, html_content)
 # Fallback 2: urltotext.com API
 async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
     """Fetches website content using urltotext.com API (Fallback 2)."""
+    # ... (Keep existing implementation) ...
     if not url: logger.error("[API Fallback] No URL"); return None
     if not api_key: logger.error("[API Fallback] urltotext.com API key missing."); return None
     logger.info(f"[API Fallback] Attempting fetch for: {url} using urltotext.com API")
                     else: logger.warning(f"[API Fallback] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
                 except json.JSONDecodeError: logger.error(f"[API Fallback] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
                 except Exception as e: logger.error(f"[API Fallback] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
+            elif response.status_code == 402: # Specifically handle insufficient credits
+                 logger.error(f"[API Fallback] Error 402 (Insufficient Credits) from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
+            elif response.status_code in [400, 401, 403, 422, 500]: logger.error(f"[API Fallback] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
             else: logger.error(f"[API Fallback] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
     except httpx.TimeoutException: logger.error(f"[API Fallback] Timeout connecting to urltotext.com API for {url}"); return None
     except httpx.RequestError as e: logger.error(f"[API Fallback] Request error connecting to urltotext.com API for {url}: {e}"); return None
     except Exception as e: logger.error(f"[API Fallback] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
 # --- Summarization Functions ---
 async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
+    """ Calls the Google Gemini API to generate a summary. """
     global GEMINI_MODEL, _gemini_primary_enabled
     if not _gemini_primary_enabled:
         logger.error("[Gemini Primary] Called but is disabled.");
         return None, "Error: Primary AI service (Gemini) not configured/available."
+    # Truncate input text if it exceeds the approximate limit
+    if len(text) > MAX_INPUT_TOKEN_APPROX:
+        logger.warning(f"[Gemini Primary] Input text length ({len(text)}) exceeds limit ({MAX_INPUT_TOKEN_APPROX}). Truncating.")
+        text = text[:MAX_INPUT_TOKEN_APPROX]
     logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
+    # Define prompts
+    if summary_type == "paragraph":
+        prompt = f"""Please summarise the following text into a concise paragraph. Focus on the main points and key information. Avoid unnecessary jargon or overly complex sentences.
+Text to summarise:
+---
+{text}
+---
+Concise Paragraph Summary:"""
+    elif summary_type == "points":
+        prompt = f"""Please summarise the following text into a list of key bullet points. Each point should capture a distinct main idea or important piece of information. Aim for clarity and conciseness.
+Text to summarise:
+---
+{text}
+---
+Key Bullet Points Summary:"""
+    else:
+        logger.error(f"[Gemini Primary] Invalid summary_type: {summary_type}")
+        return None, f"Error: Invalid summary type '{summary_type}' specified."
+    # Configure safety settings (adjust as needed)
+    safety_settings = {
+        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
+        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
+        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
+        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
+    }
+    # Configure generation settings (optional)
+    generation_config = genai.types.GenerationConfig(
+        # candidate_count=1, # Default is 1
+        # stop_sequences=["\n"],
+        max_output_tokens=2048, # Increased max tokens for potentially longer summaries from large inputs
+        temperature=0.7, # Adjust creativity vs factualness
+        # top_p=1.0, # Default
+        # top_k=None # Default
+    )
+    try:
+        model = genai.GenerativeModel(GEMINI_MODEL)
+        logger.debug(f"[Gemini Primary] Sending request to model {GEMINI_MODEL}")
+        response: GenerateContentResponse = await model.generate_content_async( # Use async version
+            prompt,
+            generation_config=generation_config,
+            safety_settings=safety_settings
+        )
+        logger.debug(f"[Gemini Primary] Received response. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}")
+        # Check for safety blocks or other issues in response
+        if not response.candidates:
+             block_reason = response.prompt_feedback.block_reason if hasattr(response, 'prompt_feedback') else 'Unknown'
+             error_msg = f"Error: Gemini response blocked or empty. Reason: {block_reason}"
+             logger.error(f"[Gemini Primary] {error_msg}")
+             return None, error_msg
+        # Check finish reason (e.g., MAX_TOKENS, SAFETY)
+        finish_reason = response.candidates[0].finish_reason
+        if finish_reason != genai.types.FinishReason.STOP and finish_reason != genai.types.FinishReason.MAX_TOKENS:
+             # Log safety ratings if available
+             safety_ratings_str = "N/A"
+             if hasattr(response.candidates[0], 'safety_ratings'):
+                 safety_ratings_str = ', '.join([f"{r.category.name}: {r.probability.name}" for r in response.candidates[0].safety_ratings])
+             error_msg = f"Error: Gemini generation finished unexpectedly. Reason: {finish_reason.name}. Safety: {safety_ratings_str}"
+             logger.error(f"[Gemini Primary] {error_msg}")
+             # Return partial text if available and finish reason is MAX_TOKENS? Maybe not, could be truncated badly.
+             # If SAFETY, definitely return error.
+             if finish_reason == genai.types.FinishReason.SAFETY:
+                  return None, error_msg # Return specific error for safety blocks
+             # For other reasons, maybe return partial, but safer to return error for now
+             # return response.text if hasattr(response, 'text') else None, error_msg # Optional: return partial text for RECITATION/OTHER
+             return None, f"Error: Gemini generation finished unexpectedly ({finish_reason.name})."
+        # Extract text
+        summary_text = response.text
+        if not summary_text or not summary_text.strip():
+            logger.warning("[Gemini Primary] Gemini returned an empty summary.")
+            return None, "Error: AI generated an empty summary."
+        logger.info(f"[Gemini Primary] Summary generated successfully (len: {len(summary_text)}).")
+        return summary_text.strip(), None
+    except Exception as e:
+        logger.error(f"[Gemini Primary] Error during API call to {GEMINI_MODEL}: {e}", exc_info=True)
+        # Check for specific Google API errors if needed
+        # from google.api_core import exceptions as google_exceptions
+        # if isinstance(e, google_exceptions.GoogleAPIError): ...
+        return None, f"Error: Failed to communicate with the primary AI service (Gemini). Details: {e}"
 async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
+    """ Calls the OpenRouter API to generate a summary. """
     global OPENROUTER_API_KEY, OPENROUTER_MODEL, _openrouter_fallback_enabled
     if not _openrouter_fallback_enabled:
         logger.error("[OpenRouter Fallback] Called but is disabled.");
         return None, "Error: Fallback AI service (OpenRouter) not configured/available."
+    # OpenRouter models might have smaller context windows, truncate more aggressively if needed
+    # Example: 32k tokens ~ 120k chars. Deepseek is large though. Check model specifics if issues arise.
+    max_input_len_openrouter = 100000 # Adjust based on OPENROUTER_MODEL limits if known
+    if len(text) > max_input_len_openrouter:
+        logger.warning(f"[OpenRouter Fallback] Input text length ({len(text)}) exceeds approx limit ({max_input_len_openrouter}) for {OPENROUTER_MODEL}. Truncating.")
+        text = text[:max_input_len_openrouter]
     logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
+    # Define prompts (similar structure to Gemini)
+    if summary_type == "paragraph":
+        prompt_content = f"""Please summarise the following text into a concise paragraph. Focus on the main points and key information.
+Text:
+---
+{text}
+---
+Concise Paragraph Summary:"""
+    elif summary_type == "points":
+        prompt_content = f"""Please summarise the following text into a list of key bullet points. Each point should capture a distinct main idea.
+Text:
+---
+{text}
+---
+Key Bullet Points Summary:"""
+    else:
+        logger.error(f"[OpenRouter Fallback] Invalid summary_type: {summary_type}")
+        return None, f"Error: Invalid summary type '{summary_type}' specified."
+    headers = {
+        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+        "Content-Type": "application/json",
+        "HTTP-Referer": "https://github.com/fmab777/telegram-summary-bot", # Optional: Identify your app
+        "X-Title": "Telegram Summary Bot", # Optional: Identify your app
+    }
+    payload = {
+        "model": OPENROUTER_MODEL,
+        "messages": [
+            {"role": "system", "content": "You are an expert summarizer. Provide summaries as requested."},
+            {"role": "user", "content": prompt_content}
+        ],
+        "max_tokens": 2048, # Adjust as needed
+        "temperature": 0.7,
+    }
+    api_url = "https://openrouter.ai/api/v1/chat/completions"
+    try:
+        async with httpx.AsyncClient(timeout=120.0) as client: # Longer timeout for potentially slower models
+            logger.debug(f"[OpenRouter Fallback] Sending request to {api_url} for model {OPENROUTER_MODEL}")
+            response = await client.post(api_url, headers=headers, json=payload)
+            logger.debug(f"[OpenRouter Fallback] Received status code {response.status_code}")
+            if response.status_code == 200:
+                try:
+                    data = response.json()
+                    if data.get("choices") and len(data["choices"]) > 0:
+                        message = data["choices"][0].get("message")
+                        if message and message.get("content"):
+                            summary_text = message["content"].strip()
+                            if summary_text:
+                                finish_reason = data["choices"][0].get("finish_reason", "N/A")
+                                logger.info(f"[OpenRouter Fallback] Summary generated successfully (len: {len(summary_text)}). Finish: {finish_reason}")
+                                # Check for length finish reason?
+                                if finish_reason == 'length':
+                                    logger.warning("[OpenRouter Fallback] Summary may be truncated due to max_tokens limit.")
+                                return summary_text, None
+                            else:
+                                logger.warning("[OpenRouter Fallback] OpenRouter returned an empty summary content.")
+                                return None, "Error: Fallback AI generated an empty summary."
+                        else:
+                             logger.error(f"[OpenRouter Fallback] Invalid response structure (missing message/content). Data: {data}")
+                             return None, "Error: Fallback AI returned an invalid response format."
+                    else:
+                        logger.error(f"[OpenRouter Fallback] Invalid response structure (missing choices). Data: {data}")
+                        # Check for error object in response
+                        api_error = data.get("error", {}).get("message", "Unknown API error")
+                        return None, f"Error: Fallback AI response missing summary. API msg: {api_error}"
+                except json.JSONDecodeError:
+                    logger.error(f"[OpenRouter Fallback] Failed to decode JSON response. Status: {response.status_code}, Text: {response.text[:500]}")
+                    return None, "Error: Fallback AI sent an invalid JSON response."
+                except Exception as e:
+                    logger.error(f"[OpenRouter Fallback] Error processing success response: {e}", exc_info=True)
+                    return None, f"Error: Failed to process Fallback AI response. Details: {e}"
+            else:
+                # Handle API errors (rate limits, auth, etc.)
+                error_message = f"Error: Fallback AI service ({OPENROUTER_MODEL}) returned status {response.status_code}."
+                try:
+                    error_details = response.json().get("error", {}).get("message", response.text[:200])
+                    error_message += f" Details: {error_details}"
+                except Exception:
+                    error_message += f" Response: {response.text[:200]}"
+                logger.error(f"[OpenRouter Fallback] {error_message}")
+                return None, error_message
+    except httpx.TimeoutException:
+        logger.error(f"[OpenRouter Fallback] Timeout connecting to OpenRouter API for {OPENROUTER_MODEL}")
+        return None, "Error: Timed out connecting to the fallback AI service."
+    except httpx.RequestError as e:
+        logger.error(f"[OpenRouter Fallback] Request error connecting to OpenRouter API: {e}")
+        return None, f"Error: Network error connecting to the fallback AI service. Details: {e}"
+    except Exception as e:
+        logger.error(f"[OpenRouter Fallback] Unexpected error during OpenRouter API call: {e}", exc_info=True)
+        return None, f"Error: Unexpected issue with the fallback AI service. Details: {e}"
 async def generate_summary(text: str, summary_type: str) -> str:
+    """ Generates a summary using the primary AI (Gemini) and falling back to OpenRouter. """
     global _gemini_primary_enabled, _openrouter_fallback_enabled, GEMINI_MODEL, OPENROUTER_MODEL
     logger.info(f"[Summary Generation] Starting process. Primary: Gemini ({GEMINI_MODEL}), Fallback: OpenRouter ({OPENROUTER_MODEL})")
+    final_summary: Optional[str] = None
+    error_message: Optional[str] = None # Accumulates errors
+    # --- Attempt Primary AI (Gemini) ---
     if _gemini_primary_enabled:
         logger.info(f"[Summary Generation] Attempting primary AI: Gemini ({GEMINI_MODEL})")
+        primary_summary, primary_error = await _call_gemini(text, summary_type)
+        if primary_summary:
+            logger.info(f"[Summary Generation] Success with primary AI (Gemini).")
+            return primary_summary # Return successful primary summary immediately
+        else:
+            logger.warning(f"[Summary Generation] Primary AI (Gemini) failed. Error: {primary_error}. Proceeding to fallback.")
+            error_message = f"Primary AI ({GEMINI_MODEL}) failed: {primary_error}" # Store primary error
     else:
         logger.warning("[Summary Generation] Primary AI (Gemini) disabled. Proceeding to fallback.")
         error_message = "Primary AI (Gemini) unavailable."
+    # --- Attempt Fallback AI (OpenRouter) ---
     if _openrouter_fallback_enabled:
         logger.info(f"[Summary Generation] Attempting fallback AI: OpenRouter ({OPENROUTER_MODEL})")
         fallback_summary, fallback_error = await _call_openrouter(text, summary_type)
+        if fallback_summary:
+            logger.info(f"[Summary Generation] Success with fallback AI (OpenRouter).")
+            return fallback_summary # Return successful fallback summary
         else:
             logger.error(f"[Summary Generation] Fallback AI (OpenRouter) also failed. Error: {fallback_error}")
+            # Combine errors for final message
+            if error_message: # If primary also failed
+                return f"{error_message}\nFallback AI ({OPENROUTER_MODEL}) also failed: {fallback_error}"
+            else: # Should not happen if logic is correct, but fallback just in case
+                return f"Fallback AI ({OPENROUTER_MODEL}) failed: {fallback_error}"
     else:
         logger.error("[Summary Generation] Fallback AI (OpenRouter) is disabled. Cannot proceed.")
+        if error_message: # Primary failed AND fallback disabled
+            return f"{error_message}\nFallback AI is also unavailable."
+        else: # Primary disabled AND fallback disabled
+             return "Error: Both primary and fallback AI services are unavailable."
+    # This part should ideally not be reached if the logic above is sound
+    logger.error("[Summary Generation] Reached end of function unexpectedly. No summary generated.")
+    final_error = error_message or "Unknown summary generation error."
+    return f"Sorry, an error occurred: {final_error}"
 # --- Main Processing Task ---
                     message_id=status_message_id,
                     text=processing_message_text,
                     parse_mode=ParseMode.HTML, # Use HTML for escaped URL
+                    reply_markup=None,
+                    link_preview_options={'is_disabled': True} # Disable preview here too
                 )
                 logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
             except Exception as e:
                     bot.send_message,
                     chat_id=chat_id,
                     text=processing_message_text,
+                    parse_mode=ParseMode.HTML, # Use HTML for escaped URL
+                    link_preview_options={'is_disabled': True}
                 )
                 if status_message:
                     message_to_delete_later_id = status_message.message_id
                     logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
                 else:
                     raise RuntimeError("Failed to send status message after retries.")
             except Exception as e:
                 logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}")
                 user_feedback_message = "Sorry, there was an issue starting the process."
                 # Attempt to send final feedback later if possible
                 logger.warning(f"[Task {task_id}] Crawl4AI failed for {url}. Attempting BeautifulSoup (Fallback 1)...")
                 try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
                 except Exception: pass
+                content = await get_website_content_bs4(url)
                 if not content:
                     logger.warning(f"[Task {task_id}] BeautifulSoup also failed for {url}. Attempting API (Fallback 2)...")
                         except Exception: pass
                         content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
                         if not content:
+                            # Check if the specific error was insufficient credits
+                            # Note: get_website_content_via_api already logs the specific error
+                            logger.error(f"[Task {task_id}] API fallback (urltotext) also failed for {url}.")
+                            user_feedback_message = "Sorry, I couldn't fetch content from that website using any method (Crawl4AI/BS4 failed, API failed or ran out of credits)." # Updated message
                     else:
                         logger.warning(f"[Task {task_id}] API fallback is disabled. Cannot attempt Fallback 2.")
+                        user_feedback_message = "Sorry, I couldn't fetch content from that website using Crawl4AI or BeautifulSoup, and the API fallback is not enabled." # Updated message
+            # Final check if all web methods failed
+            if not content and not user_feedback_message:
+                 logger.error(f"[Task {task_id}] All website fetching methods seem to have failed without setting a specific user message.")
+                 user_feedback_message = "Sorry, I couldn't fetch content from that website using any available method (blocked/inaccessible/empty?)."
         # --- Generate Summary if Content was Fetched ---
                 logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
             else:
                 # Success - Send the summary
                 summary_parts = []
                 current_part = ""
+                # Split respecting newlines, ensure no part exceeds MAX_SUMMARY_CHUNK_SIZE
+                lines = final_summary.splitlines(keepends=True)
+                for line in lines:
+                     # If adding the next line exceeds the limit, finalize the current part
+                     if len(current_part) + len(line) > MAX_SUMMARY_CHUNK_SIZE:
+                         if current_part.strip(): # Don't add empty parts
+                             summary_parts.append(current_part.strip())
+                         current_part = line # Start new part with the current line
+                         # If a single line itself is too long, truncate it (edge case)
+                         if len(current_part) > MAX_SUMMARY_CHUNK_SIZE:
+                              logger.warning(f"[Task {task_id}] Truncating overly long line in summary.")
+                              current_part = current_part[:MAX_SUMMARY_CHUNK_SIZE]
                      else:
+                         current_part += line
+                # Add the last part if it has content
+                if current_part.strip():
                     summary_parts.append(current_part.strip())
+                # If somehow splitting resulted in nothing (e.g., empty summary initially?)
+                if not summary_parts:
+                     summary_parts.append("Summary generated, but it appears to be empty.")
+                     logger.warning(f"[Task {task_id}] Summary was non-empty initially but splitting resulted in zero parts.")
+                logger.info(f"[Task {task_id}] Summary generated (orig len: {len(final_summary)}). Sending in {len(summary_parts)} part(s).")
+                # Determine the target message ID for the *first* part
+                # Prefer editing the "Processing..." message if we sent a new one
+                edit_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
                 message_sent = False
+                if edit_target_id:
                     try:
                         # Try editing the status message first
                         await retry_bot_operation(
                             bot.edit_message_text,
                             chat_id=chat_id,
+                            message_id=edit_target_id,
                             text=summary_parts[0],
                             parse_mode=None, # Send as plain text initially, safer
                             link_preview_options={'is_disabled': True}
                         )
+                        logger.debug(f"[Task {task_id}] Edited message {edit_target_id} with first summary part.")
+                        # Prevent this message from being deleted later if it was the 'Processing...' one
+                        if message_to_delete_later_id == edit_target_id: message_to_delete_later_id = None
+                        # If it was the *original* button message that we are editing, keep status_message_id
+                        # so we know *not* to delete it in finally block if it's the only message left.
+                        # However, it's clearer to just prevent deletion if edited.
+                        if status_message_id == edit_target_id: status_message_id = None # Mark as handled
                         message_sent = True
                     except Exception as edit_err:
+                         logger.warning(f"[Task {task_id}] Failed to edit message {edit_target_id} with summary: {edit_err}. Sending new message instead.")
                          # If edit fails, fall through to send a new message
                 if not message_sent:
+                     # Send the first part as a new message
+                     sent_msg = await retry_bot_operation(
                         bot.send_message,
                         chat_id=chat_id,
                         text=summary_parts[0],
                         parse_mode=None,
                         link_preview_options={'is_disabled': True}
+                     )
+                     if sent_msg:
+                        logger.debug(f"[Task {task_id}] Sent first summary part as new message {sent_msg.message_id}.")
+                     else: # Should be caught by retry, but log defensively
+                        logger.error(f"[Task {task_id}] Failed to send first summary part even as new message.")
+                        user_feedback_message = "Sorry, failed to send the summary." # Set error
+                # Send remaining parts (if any and first part succeeded)
+                if not user_feedback_message and len(summary_parts) > 1:
+                    for i, part in enumerate(summary_parts[1:], start=2):
+                        await asyncio.sleep(0.5) # Small delay between parts
+                        try:
+                            await retry_bot_operation(
+                                bot.send_message,
+                                chat_id=chat_id,
+                                text=part,
+                                parse_mode=None,
+                                link_preview_options={'is_disabled': True}
+                            )
+                            logger.debug(f"[Task {task_id}] Sent summary part {i}/{len(summary_parts)}.")
+                        except Exception as part_err:
+                             logger.error(f"[Task {task_id}] Failed to send summary part {i}: {part_err}")
+                             user_feedback_message = f"Sorry, failed to send part {i} of the summary."
+                             # Should we stop sending further parts? Yes.
+                             break # Stop sending remaining parts
+                # Determine overall success based on whether feedback message is set
+                if not user_feedback_message:
+                    success = True
+                # user_feedback_message = None # Clear feedback message ONLY on full success
         # --- Handle Cases Where No Content Was Fetched or Summary Failed ---
+        if user_feedback_message: # Check if any error occurred
+            logger.warning(f"[Task {task_id}] Sending failure/error feedback to user: {user_feedback_message}")
             try:
                 # Try editing the status message first
                 feedback_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
                             reply_markup=None # Remove buttons
                         )
                         logger.debug(f"[Task {task_id}] Edited message {feedback_target_id} with failure feedback.")
+                        # Prevent deletion if edited
                         if message_to_delete_later_id == feedback_target_id: message_to_delete_later_id = None
+                        if status_message_id == feedback_target_id: status_message_id = None
                         message_sent = True
                     except Exception as edit_err:
                          logger.warning(f"[Task {task_id}] Failed to edit message {feedback_target_id} with failure feedback: {edit_err}. Sending new message instead.")
         # Catch-all for unexpected errors during the main processing logic
         logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True)
         user_feedback_message = "Oops! Something went wrong while processing your request. Please try again later."
+        if bot: # Ensure bot exists before trying to send
+            try:
+                # Attempt to send a final error message
+                await retry_bot_operation(
+                    bot.send_message,
+                    chat_id=chat_id,
+                    text=user_feedback_message
+                )
+            except Exception as final_err:
+                logger.error(f"[Task {task_id}] Failed to send the final unexpected error feedback: {final_err}")
     finally:
         # --- Cleanup ---
+        # Delete the temporary "Processing..." message if it exists and wasn't edited/handled
+        if message_to_delete_later_id and bot:
             try:
+                await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=message_to_delete_later_id)
+                logger.debug(f"[Task {task_id}] Deleted temporary status message {message_to_delete_later_id}")
             except Exception as del_e:
+                logger.warning(f"[Task {task_id}] Failed to delete temporary status message {message_to_delete_later_id}: {del_e}")
+        # Explicitly DO NOT delete the original message with buttons (status_message_id)
+        # if it was successfully edited with the final result or error message.
+        # The logic above sets status_message_id = None if it was edited.
+        # If status_message_id still holds the ID here, it means editing failed and we sent a *new* message.
+        # In that failure case, maybe we *should* delete the original button message? Or leave it?
+        # Let's leave it for now to avoid deleting user context if things went very wrong.
+        # Deleting message_to_delete_later_id covers the main cleanup case.
         # Close the background bot's HTTP client
         if background_request and hasattr(background_request, '_client') and background_request._client:
 # --- Telegram Handlers ---
 # (start, help_command, handle_potential_url, handle_summary_type_callback, error_handler)
+# These remain largely the same.
 async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
     # ... (Keep existing implementation) ...
     user = update.effective_user
     if not user or not update.message: return
     logger.info(f"User {user.id} ({user.username or 'N/A'}) used /help.")
+    # Updated help text slightly
     help_text = ( "🔍 **How to use:**\n\n"
                  "1. Send me any YouTube video link or website URL.\n"
                  "2. I'll ask how you want it summarised (paragraph or points).\n"
                  "3. Click the button for your choice.\n"
                  "4. Wait for the summary!\n\n"
                  "⚙️ **Behind the scenes:**\n"
+                 f"• **Websites:** I try `Crawl4AI` (smart crawl), then `BeautifulSoup` (basic scrape), and `urltotext.com` API (if configured & credits available).\n"
+                 "• **YouTube:** I use `youtube-transcript-api` first, then fall back to `Supadata` and `Apify` APIs if needed.\n"
+                 f"• **Summaries:** Generated using Google `{GEMINI_MODEL}` (primary) or `{OPENROUTER_MODEL}` (fallback, if configured).\n\n"
                  "**Commands:**\n"
                  "`/start` - Display welcome message\n"
                  "`/help` - Show this help message" )
     if not user: return
     # Basic URL validation
     if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]:
         logger.debug(f"Ignoring non-URL from {user.id}: {url}")
         # Optionally reply to the user that it doesn't look like a valid URL
+        await update.message.reply_text("Hmm, that doesn't look like a valid web URL. Please make sure it starts with `http://` or `https://`.", parse_mode=ParseMode.MARKDOWN)
         return
     logger.info(f"User {user.id} ({user.username or 'N/A'}) sent potential URL: {url}")
     # Store URL and original message ID in user_data
     if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
     user = query.from_user; summary_type = query.data; query_id = query.id
     try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id} ({user.username or 'N/A'})")
+    except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True) # Log but continue
     url = context.user_data.get('url_to_summarize')
     message_id_to_edit = query.message.message_id # The message with the buttons
     if not url:
         logger.warning(f"No URL in context for user {user.id} (cb {query_id}). Expired?")
         try:
              await query.edit_message_text(text="Sorry, I seem to have lost the context for that link. 🤔 Please send the URL again.", reply_markup=None)
         except BadRequest as e:
             if "message is not modified" in str(e).lower(): pass # Ignore if text is the same
             else: logger.error(f"Failed edit 'URL not found' msg: {e}")
+        except Exception as e: logger.error(f"Failed edit 'URL not found' msg: {e}")
+        return # Do not proceed further
+    # Clear context *only after* successfully scheduling the task below
+    # context.user_data.pop('url_to_summarize', None) # Moved clearing
+    # context.user_data.pop('original_message_id', None) # Moved clearing
     # Check necessary configurations before scheduling
     global TELEGRAM_TOKEN, _gemini_primary_enabled, _openrouter_fallback_enabled
     if not TELEGRAM_TOKEN:
         logger.critical("TG TOKEN missing! Cannot schedule task.")
+        try: await query.edit_message_text(text="❌ Bot configuration error (Token Missing). Cannot proceed.", reply_markup=None)
         except Exception: pass
         return
     if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
         logger.critical("Neither Gemini nor OpenRouter API keys are configured/valid! Cannot summarize.")
+        try: await query.edit_message_text(text="❌ AI configuration error: No summarization models are available. Cannot proceed.", reply_markup=None)
         except Exception: pass
         return
     # Log warnings if one model is missing, but proceed if at least one is available
             message_id_to_edit=message_id_to_edit, # Pass the ID of the message with buttons
             url=url,
             summary_type=summary_type,
+            bot_token=TELEGRAM_TOKEN # Pass token explicitly
         ),
         name=f"SummaryTask-{user.id}-{message_id_to_edit}"
     )
+    # Clear context AFTER scheduling the task to prevent race conditions if user clicks fast
     context.user_data.pop('url_to_summarize', None)
     context.user_data.pop('original_message_id', None)
     logger.debug(f"Cleared URL context for user {user.id} after scheduling task.")
+    # Optionally edit the button message *immediately* to give feedback before the task edits it again
+    # This prevents the user clicking again while the task starts up.
+    # try:
+    #     await query.edit_message_text(text=f"Okay, starting '{summary_type}' summary...", reply_markup=None)
+    # except Exception as e:
+    #     logger.warning(f"Could not edit button message immediately after scheduling: {e}")
+    # This initial edit will be quickly overwritten by the task's "Processing..." message.
 async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
     # ... (Keep existing implementation) ...
+    # Consider adding specific TelegramError types if needed
+    ignore_errors = (AttributeError, BadRequest, TimedOut, NetworkError, RetryAfter) # Add common transient errors
     if isinstance(context.error, ignore_errors):
+        ignore_messages = ["message is not modified", "query is too old", "message to edit not found", "chat not found", "bot was blocked by the user"]
+        err_str = str(context.error).lower()
+        if any(msg in err_str for msg in ignore_messages) or isinstance(context.error, (TimedOut, NetworkError, RetryAfter)):
+            logger.warning(f"Ignoring known/handled/transient error in error_handler: {context.error}")
             return
     logger.error("Exception while handling an update:", exc_info=context.error)
     # Consider notifying the user about unexpected errors if appropriate and possible
     # Add Handlers
     application.add_handler(CommandHandler("start", start))
     application.add_handler(CommandHandler("help", help_command))
+    # Use a slightly broader filter to catch URLs even without explicit entity type from Telegram
+    url_filter = filters.TEXT & ~filters.COMMAND & (filters.Entity("url") | filters.Entity("text_link") | filters.Regex(r'https?://[^\s]+'))
+    application.add_handler(MessageHandler(url_filter, handle_potential_url))
     application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
     # Error Handler
     application.add_error_handler(error_handler)
                 logger.error(f"FATAL: Failed to set webhook: {e}", exc_info=True)
                 raise RuntimeError(f"Failed to set webhook: {e}") from e
         else:
+            # Attempt to get URL from request headers if available (might work in some environments)
+            # This is less reliable than SPACE_HOST
+            logger.warning("SPACE_HOST environment variable not found. Webhook URL cannot be determined reliably for setup.")
+            # You might decide to raise an error or try to run in polling mode if webhook fails
+            raise RuntimeError("Webhook URL undetermined (SPACE_HOST missing).")
         logger.info("ASGI Lifespan: Startup complete.");
         yield # Application runs here
 async def health_check(request: Request) -> PlainTextResponse:
+    # ... (Keep existing implementation, updated with model names) ...
+    global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled, _crawl4ai_primary_web_enabled, _urltotext_fallback_enabled, SUPADATA_API_KEY
     bot_status = "Not Initialized"
     bot_username = "N/A"
+    if ptb_app and ptb_app.bot and ptb_app.initialized: # Check if initialized
         try:
+             # Quick check if webhook seems ok, more reliable than get_me() sometimes
+            wh_info = await ptb_app.bot.get_webhook_info()
+            if ptb_app.running and wh_info and wh_info.url:
                  bot_info = await ptb_app.bot.get_me()
                  bot_username = f"@{bot_info.username}"
+                 bot_status = f"Running (Webhook OK, {bot_username})"
+            elif ptb_app.running:
+                 bot_status = "Running (Webhook check failed or not set)"
             else: bot_status = "Initialized/Not running"
         except Exception as e: bot_status = f"Error checking status: {e}"
+    elif ptb_app:
+        bot_status = "Initializing..."
     health_info = [
+        f"=== Telegram Summary Bot Status ===",
+        f"Bot Application: {bot_status}",
+        "--- Services ---",
+        f"Primary Web Scraper: {'Crawl4AI' if _crawl4ai_primary_web_enabled else 'DISABLED (Lib Missing)'}",
         f"Fallback Web Scraper 1: BeautifulSoup",
+        f"Fallback Web Scraper 2: {'urltotext.com API' if _urltotext_fallback_enabled else 'DISABLED (No Key)'}",
+        f"Primary Summarizer: {'Gemini (' + GEMINI_MODEL + ')' if _gemini_primary_enabled else 'DISABLED (No Key/Lib)'}",
+        f"Fallback Summarizer: {'OpenRouter (' + OPENROUTER_MODEL + ')' if _openrouter_fallback_enabled else 'DISABLED (No Key)'}",
         f"Primary YT Transcript: youtube-transcript-api",
+        f"Fallback YT Transcript 1: {'Supadata API' if SUPADATA_API_KEY else 'DISABLED (No Key)'}",
+        f"Fallback YT Transcript 2: {'Apify (' + APIFY_ACTOR_ID + ')' if _apify_token_exists else 'DISABLED (No Key)'}"
     ]
     return PlainTextResponse("\n".join(health_info))
     log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
     # Use the PORT env var for local running too, defaulting to 8080
     local_port = int(os.environ.get('PORT', 8080))
+    # Make sure necessary env vars are loaded for local dev if not set system-wide
+    # Example using python-dotenv if you add it to requirements-dev.txt
+    # from dotenv import load_dotenv
+    # load_dotenv()
+    # logger.info("Loaded environment variables from .env file for local development.")
+    # Re-check required tokens after potential .env load
+    if not get_secret('TELEGRAM_TOKEN'): logger.critical("Local Dev: TELEGRAM_TOKEN not found.")
+    if not get_secret('GEMINI_API_KEY'): logger.error("Local Dev: GEMINI_API_KEY not found.")
+    # Add checks for other keys as needed for local testing
     uvicorn.run(
         "main:app",
         host='0.0.0.0',