fmab777 commited on
Commit
d6e3c43
Β·
verified Β·
1 Parent(s): 4302db4

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +492 -164
main.py CHANGED
@@ -53,12 +53,13 @@ except ImportError:
53
  # --- Google Gemini ---
54
  try:
55
  import google.generativeai as genai
56
- from google.generativeai.types import HarmCategory, HarmBlockThreshold
57
  _gemini_available = True
58
  except ImportError:
59
  genai = None
60
  HarmCategory = None
61
  HarmBlockThreshold = None
 
62
  _gemini_available = False
63
  # logger defined later
64
 
@@ -111,7 +112,8 @@ GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Primary Summarizer
111
  # Models (User can still configure via env vars)
112
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Model
113
  APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
114
- GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash-001") # Primary Model - Updated May 2024
 
115
 
116
  # --- Configuration Checks ---
117
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
@@ -155,6 +157,10 @@ if _gemini_primary_enabled:
155
  logger.error(f"Failed to configure Google GenAI client: {e}")
156
  _gemini_primary_enabled = False
157
 
 
 
 
 
158
  # --- Retry Decorator ---
159
  # (Remains the same)
160
  @retry(
@@ -333,12 +339,37 @@ async def get_website_content_via_crawl4ai(url: str) -> Optional[str]:
333
  return None
334
 
335
  logger.info(f"[Crawl4AI Primary] Attempting to crawl URL: {url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  try:
337
- # Use AsyncWebCrawler context manager for proper resource handling
338
- async with AsyncWebCrawler() as crawler:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  # Use arun for a single URL crawl
340
- # We primarily want the Markdown output as it's designed for LLMs
341
- # Add a reasonable timeout
342
  result = await crawler.arun(url=url, crawler_strategy="playwright", timeout=90) # 90 sec timeout
343
 
344
  if result and result.markdown:
@@ -363,9 +394,15 @@ async def get_website_content_via_crawl4ai(url: str) -> Optional[str]:
363
  except asyncio.TimeoutError:
364
  logger.error(f"[Crawl4AI Primary] Timeout occurred while crawling {url}")
365
  return None
 
 
 
366
  except Exception as e:
367
- logger.error(f"[Crawl4AI Primary] Unexpected error during crawl for {url}: {e}", exc_info=True)
368
- # Log specific crawl4ai errors if they become apparent
 
 
 
369
  return None
370
 
371
 
@@ -381,8 +418,8 @@ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[
381
  response.raise_for_status()
382
  content_type = response.headers.get('content-type', '').lower()
383
  if 'html' not in content_type: logger.warning(f"[Web Scrape BS4] Non-HTML content type from {url}: {content_type}"); return None
384
- try: return response.text
385
- except Exception as e: logger.error(f"[Web Scrape BS4] Error decoding response for {url}: {e}"); return None
386
  except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape BS4] HTTP error {e.response.status_code} fetching {url}: {e}")
387
  except httpx.TimeoutException: logger.error(f"[Web Scrape BS4] Timeout error fetching {url}")
388
  except httpx.TooManyRedirects: logger.error(f"[Web Scrape BS4] Too many redirects fetching {url}")
@@ -392,6 +429,7 @@ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[
392
 
393
  async def get_website_content_bs4(url: str) -> Optional[str]:
394
  """Fetches and parses website content using BeautifulSoup (Fallback 1)."""
 
395
  if not url: logger.error("[BS4 Fallback] get_website_content_bs4: No URL"); return None
396
  logger.info(f"[BS4 Fallback] Attempting basic fetch & parse for: {url}")
397
  html_content = await fetch_url_content_for_scrape(url)
@@ -404,20 +442,26 @@ async def get_website_content_bs4(url: str) -> Optional[str]:
404
  def parse_html(content):
405
  soup = BeautifulSoup(content, DEFAULT_PARSER)
406
  # Remove common non-content elements
407
- for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]):
408
  element.extract()
409
- # Try to find main content areas
410
- main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
411
- target_element = main_content if main_content else soup.body
412
- if not target_element:
413
- logger.warning(f"[BS4 Fallback] Could not find body/main for parsing {url}")
414
- return None
415
- # Extract text, clean up whitespace
 
 
 
 
416
  lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
417
- text = " ".join(lines)
418
- if not text:
419
- logger.warning(f"[BS4 Fallback] Extracted text is empty after cleaning for {url}")
420
- return None
 
 
421
  return text
422
 
423
  text_content = await asyncio.to_thread(parse_html, html_content)
@@ -434,7 +478,7 @@ async def get_website_content_bs4(url: str) -> Optional[str]:
434
  # Fallback 2: urltotext.com API
435
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
436
  """Fetches website content using urltotext.com API (Fallback 2)."""
437
- # ... (Keep existing implementation, maybe adjust log prefix) ...
438
  if not url: logger.error("[API Fallback] No URL"); return None
439
  if not api_key: logger.error("[API Fallback] urltotext.com API key missing."); return None
440
  logger.info(f"[API Fallback] Attempting fetch for: {url} using urltotext.com API")
@@ -455,83 +499,286 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
455
  else: logger.warning(f"[API Fallback] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
456
  except json.JSONDecodeError: logger.error(f"[API Fallback] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
457
  except Exception as e: logger.error(f"[API Fallback] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
458
- elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[API Fallback] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
 
 
459
  else: logger.error(f"[API Fallback] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
460
  except httpx.TimeoutException: logger.error(f"[API Fallback] Timeout connecting to urltotext.com API for {url}"); return None
461
  except httpx.RequestError as e: logger.error(f"[API Fallback] Request error connecting to urltotext.com API for {url}: {e}"); return None
462
  except Exception as e: logger.error(f"[API Fallback] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
463
 
464
  # --- Summarization Functions ---
465
- # (_call_gemini, _call_openrouter, generate_summary remain the same)
466
  async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
467
- # ... (Keep existing implementation) ...
468
  global GEMINI_MODEL, _gemini_primary_enabled
469
  if not _gemini_primary_enabled:
470
  logger.error("[Gemini Primary] Called but is disabled.");
471
  return None, "Error: Primary AI service (Gemini) not configured/available."
 
 
 
 
 
 
472
  logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
473
- # Define prompts (Keep existing prompts)
474
- if summary_type == "paragraph": prompt = ("...") # Your existing paragraph prompt
475
- else: prompt = ("...") # Your existing points prompt
476
- # ... (rest of the Gemini call logic remains the same) ...
477
- # Including length check, safety settings, API call, response handling
478
 
479
- # --- TEMPORARY PLACEHOLDER for brevity ---
480
- # Your actual Gemini call logic here... this is just a stub
481
- # Make sure to handle response parsing and errors correctly as before
482
- # --- END TEMPORARY PLACEHOLDER ---
483
- return "Gemini summary placeholder", None # Replace with actual implementation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
 
485
 
486
  async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
487
- # ... (Keep existing implementation) ...
488
  global OPENROUTER_API_KEY, OPENROUTER_MODEL, _openrouter_fallback_enabled
489
  if not _openrouter_fallback_enabled:
490
  logger.error("[OpenRouter Fallback] Called but is disabled.");
491
  return None, "Error: Fallback AI service (OpenRouter) not configured/available."
 
 
 
 
 
 
 
 
492
  logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
493
- # Define prompts (Keep existing prompts)
494
- if summary_type == "paragraph": prompt = ("...") # Your existing paragraph prompt
495
- else: prompt = ("...") # Your existing points prompt
496
- # ... (rest of the OpenRouter call logic remains the same) ...
497
- # Including length check, headers, payload, API call, response handling
498
 
499
- # --- TEMPORARY PLACEHOLDER for brevity ---
500
- # Your actual OpenRouter call logic here... this is just a stub
501
- # Make sure to handle response parsing and errors correctly as before
502
- # --- END TEMPORARY PLACEHOLDER ---
503
- return "OpenRouter summary placeholder", None # Replace with actual implementation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
 
505
 
506
  async def generate_summary(text: str, summary_type: str) -> str:
507
- # ... (Keep existing implementation - calls _call_gemini then _call_openrouter) ...
508
  global _gemini_primary_enabled, _openrouter_fallback_enabled, GEMINI_MODEL, OPENROUTER_MODEL
509
  logger.info(f"[Summary Generation] Starting process. Primary: Gemini ({GEMINI_MODEL}), Fallback: OpenRouter ({OPENROUTER_MODEL})")
510
- final_summary: Optional[str] = None; error_message: Optional[str] = None
 
 
 
511
  if _gemini_primary_enabled:
512
  logger.info(f"[Summary Generation] Attempting primary AI: Gemini ({GEMINI_MODEL})")
513
- final_summary, error_message = await _call_gemini(text, summary_type)
514
- if final_summary: logger.info(f"[Summary Generation] Success with primary AI (Gemini)."); return final_summary
515
- else: logger.warning(f"[Summary Generation] Primary AI (Gemini) failed. Error: {error_message}. Proceeding to fallback.")
 
 
 
 
516
  else:
517
  logger.warning("[Summary Generation] Primary AI (Gemini) disabled. Proceeding to fallback.")
518
  error_message = "Primary AI (Gemini) unavailable."
519
 
 
520
  if _openrouter_fallback_enabled:
521
  logger.info(f"[Summary Generation] Attempting fallback AI: OpenRouter ({OPENROUTER_MODEL})")
522
  fallback_summary, fallback_error = await _call_openrouter(text, summary_type)
523
- if fallback_summary: logger.info(f"[Summary Generation] Success with fallback AI (OpenRouter)."); return fallback_summary
 
 
524
  else:
525
  logger.error(f"[Summary Generation] Fallback AI (OpenRouter) also failed. Error: {fallback_error}")
526
- if error_message: return f"{error_message} Fallback AI ({OPENROUTER_MODEL}) also failed: {fallback_error}"
527
- else: return f"Fallback AI ({OPENROUTER_MODEL}) failed: {fallback_error}"
 
 
 
528
  else:
529
  logger.error("[Summary Generation] Fallback AI (OpenRouter) is disabled. Cannot proceed.")
530
- if error_message: return f"{error_message} Fallback AI is also unavailable."
531
- else: return "Error: Both primary and fallback AI services are unavailable."
 
 
532
 
533
- logger.error("[Summary Generation] Reached end of function unexpectedly.")
534
- return "Sorry, an unknown error occurred during summary generation."
 
 
535
 
536
 
537
  # --- Main Processing Task ---
@@ -573,7 +820,8 @@ async def process_summary_task(
573
  message_id=status_message_id,
574
  text=processing_message_text,
575
  parse_mode=ParseMode.HTML, # Use HTML for escaped URL
576
- reply_markup=None
 
577
  )
578
  logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
579
  except Exception as e:
@@ -586,17 +834,16 @@ async def process_summary_task(
586
  bot.send_message,
587
  chat_id=chat_id,
588
  text=processing_message_text,
589
- parse_mode=ParseMode.HTML # Use HTML for escaped URL
 
590
  )
591
  if status_message:
592
  message_to_delete_later_id = status_message.message_id
593
  logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
594
  else:
595
- # This should ideally be caught by retry_bot_operation raising an error
596
  raise RuntimeError("Failed to send status message after retries.")
597
  except Exception as e:
598
  logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}")
599
- # Don't raise here, try to continue if possible, but log critical failure
600
  user_feedback_message = "Sorry, there was an issue starting the process."
601
  # Attempt to send final feedback later if possible
602
 
@@ -626,7 +873,7 @@ async def process_summary_task(
626
  logger.warning(f"[Task {task_id}] Crawl4AI failed for {url}. Attempting BeautifulSoup (Fallback 1)...")
627
  try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
628
  except Exception: pass
629
- content = await get_website_content_bs4(url) # Use the renamed BS4 function
630
 
631
  if not content:
632
  logger.warning(f"[Task {task_id}] BeautifulSoup also failed for {url}. Attempting API (Fallback 2)...")
@@ -636,11 +883,18 @@ async def process_summary_task(
636
  except Exception: pass
637
  content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
638
  if not content:
639
- logger.error(f"[Task {task_id}] All website fetching methods failed for {url}.")
640
- user_feedback_message = "Sorry, I couldn't fetch content from that website using any method (blocked/inaccessible/empty?)."
 
 
641
  else:
642
  logger.warning(f"[Task {task_id}] API fallback is disabled. Cannot attempt Fallback 2.")
643
- user_feedback_message = "Sorry, I couldn't fetch content from that website using the primary or secondary methods, and the API fallback is not configured."
 
 
 
 
 
644
 
645
 
646
  # --- Generate Summary if Content was Fetched ---
@@ -656,73 +910,107 @@ async def process_summary_task(
656
  logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
657
  else:
658
  # Success - Send the summary
659
- max_length = 4096 # Telegram's message length limit
660
  summary_parts = []
661
  current_part = ""
662
- # Split respecting Markdown formatting (basic line breaks)
663
- for line in final_summary.splitlines(keepends=True):
664
- if len(current_part) + len(line) <= max_length:
665
- current_part += line
 
 
 
 
 
 
 
 
666
  else:
667
- summary_parts.append(current_part.strip())
668
- current_part = line
669
- if current_part.strip(): # Add the last part
 
670
  summary_parts.append(current_part.strip())
671
 
672
- logger.info(f"[Task {task_id}] Summary generated (len: {len(final_summary)}). Sending in {len(summary_parts)} part(s).")
 
 
 
 
673
 
674
- # Send first part (potentially replacing the "Processing" message if it was a new one)
675
- first_message_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
 
 
 
676
  message_sent = False
677
- if first_message_target_id:
 
678
  try:
679
  # Try editing the status message first
680
  await retry_bot_operation(
681
  bot.edit_message_text,
682
  chat_id=chat_id,
683
- message_id=first_message_target_id,
684
  text=summary_parts[0],
685
  parse_mode=None, # Send as plain text initially, safer
686
  link_preview_options={'is_disabled': True}
687
  )
688
- logger.debug(f"[Task {task_id}] Edited message {first_message_target_id} with first summary part.")
689
- # Prevent this message from being deleted later
690
- if message_to_delete_later_id == first_message_target_id: message_to_delete_later_id = None
691
- elif status_message_id == first_message_target_id: status_message_id = None
 
 
 
 
692
  message_sent = True
693
  except Exception as edit_err:
694
- logger.warning(f"[Task {task_id}] Failed to edit message {first_message_target_id} with summary: {edit_err}. Sending new message instead.")
695
  # If edit fails, fall through to send a new message
696
 
697
  if not message_sent:
698
- await retry_bot_operation(
 
699
  bot.send_message,
700
  chat_id=chat_id,
701
  text=summary_parts[0],
702
  parse_mode=None,
703
  link_preview_options={'is_disabled': True}
704
- )
705
- logger.debug(f"[Task {task_id}] Sent first summary part as new message.")
706
-
707
-
708
- # Send remaining parts
709
- for i, part in enumerate(summary_parts[1:], start=2):
710
- await asyncio.sleep(0.5) # Small delay between parts
711
- await retry_bot_operation(
712
- bot.send_message,
713
- chat_id=chat_id,
714
- text=part,
715
- parse_mode=None,
716
- link_preview_options={'is_disabled': True}
717
- )
718
- logger.debug(f"[Task {task_id}] Sent summary part {i}/{len(summary_parts)}.")
719
-
720
- success = True
721
- user_feedback_message = None # Clear feedback message on success
 
 
 
 
 
 
 
 
 
 
 
 
 
722
 
723
  # --- Handle Cases Where No Content Was Fetched or Summary Failed ---
724
- elif user_feedback_message:
725
- logger.warning(f"[Task {task_id}] Sending failure feedback to user: {user_feedback_message}")
726
  try:
727
  # Try editing the status message first
728
  feedback_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
@@ -738,8 +1026,9 @@ async def process_summary_task(
738
  reply_markup=None # Remove buttons
739
  )
740
  logger.debug(f"[Task {task_id}] Edited message {feedback_target_id} with failure feedback.")
 
741
  if message_to_delete_later_id == feedback_target_id: message_to_delete_later_id = None
742
- elif status_message_id == feedback_target_id: status_message_id = None
743
  message_sent = True
744
  except Exception as edit_err:
745
  logger.warning(f"[Task {task_id}] Failed to edit message {feedback_target_id} with failure feedback: {edit_err}. Sending new message instead.")
@@ -759,27 +1048,34 @@ async def process_summary_task(
759
  # Catch-all for unexpected errors during the main processing logic
760
  logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True)
761
  user_feedback_message = "Oops! Something went wrong while processing your request. Please try again later."
762
- try:
763
- # Attempt to send a final error message
764
- await retry_bot_operation(
765
- bot.send_message,
766
- chat_id=chat_id,
767
- text=user_feedback_message
768
- )
769
- except Exception as final_err:
770
- logger.error(f"[Task {task_id}] Failed to send the final unexpected error feedback: {final_err}")
 
771
 
772
  finally:
773
  # --- Cleanup ---
774
- # Delete the "Processing..." or original button message if it wasn't edited/replaced
775
- delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
776
- if delete_target_id and bot:
777
  try:
778
- await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id)
779
- logger.debug(f"[Task {task_id}] Deleted original status/button message {delete_target_id}")
780
  except Exception as del_e:
781
- # Log as warning, not critical if deletion fails
782
- logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
 
 
 
 
 
 
 
783
 
784
  # Close the background bot's HTTP client
785
  if background_request and hasattr(background_request, '_client') and background_request._client:
@@ -794,8 +1090,7 @@ async def process_summary_task(
794
 
795
  # --- Telegram Handlers ---
796
  # (start, help_command, handle_potential_url, handle_summary_type_callback, error_handler)
797
- # These functions remain largely the same, only minor logging/config checks might be adjusted if needed.
798
- # The core logic change is within process_summary_task.
799
 
800
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
801
  # ... (Keep existing implementation) ...
@@ -809,15 +1104,16 @@ async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> No
809
  user = update.effective_user
810
  if not user or not update.message: return
811
  logger.info(f"User {user.id} ({user.username or 'N/A'}) used /help.")
 
812
  help_text = ( "πŸ” **How to use:**\n\n"
813
  "1. Send me any YouTube video link or website URL.\n"
814
  "2. I'll ask how you want it summarised (paragraph or points).\n"
815
  "3. Click the button for your choice.\n"
816
  "4. Wait for the summary!\n\n"
817
  "βš™οΈ **Behind the scenes:**\n"
818
- "β€’ **Websites:** I'll first try a smart crawl (`Crawl4AI`), then a basic scrape (`BeautifulSoup`), and finally an external API (`urltotext.com`) if needed.\n"
819
- "β€’ **YouTube:** I use `youtube-transcript-api` first, then fall back to `Supadata` and `Apify` APIs if transcripts aren't found directly.\n"
820
- "β€’ **Summaries:** Generated using Google `Gemini` (primary) or `OpenRouter` models (fallback).\n\n"
821
  "**Commands:**\n"
822
  "`/start` - Display welcome message\n"
823
  "`/help` - Show this help message" )
@@ -830,10 +1126,9 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
830
  if not user: return
831
  # Basic URL validation
832
  if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]:
833
- # Maybe add a reply here? "Please send a valid URL starting with http:// or https://"
834
  logger.debug(f"Ignoring non-URL from {user.id}: {url}")
835
  # Optionally reply to the user that it doesn't look like a valid URL
836
- # await update.message.reply_text("Hmm, that doesn't look like a valid web URL. Please make sure it starts with `http://` or `https://`.", parse_mode=ParseMode.MARKDOWN)
837
  return
838
  logger.info(f"User {user.id} ({user.username or 'N/A'}) sent potential URL: {url}")
839
  # Store URL and original message ID in user_data
@@ -850,7 +1145,7 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
850
  if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
851
  user = query.from_user; summary_type = query.data; query_id = query.id
852
  try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id} ({user.username or 'N/A'})")
853
- except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True)
854
 
855
  url = context.user_data.get('url_to_summarize')
856
  message_id_to_edit = query.message.message_id # The message with the buttons
@@ -859,31 +1154,27 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
859
  if not url:
860
  logger.warning(f"No URL in context for user {user.id} (cb {query_id}). Expired?")
861
  try:
862
- # Edit the message where the button was clicked
863
  await query.edit_message_text(text="Sorry, I seem to have lost the context for that link. πŸ€” Please send the URL again.", reply_markup=None)
864
  except BadRequest as e:
865
  if "message is not modified" in str(e).lower(): pass # Ignore if text is the same
866
  else: logger.error(f"Failed edit 'URL not found' msg: {e}")
867
- except Exception as e:
868
- logger.error(f"Failed edit 'URL not found' msg: {e}")
869
- # Do not proceed further
870
- return
871
 
872
- # Clear context *after* successfully retrieving URL and scheduling task
873
- # context.user_data.pop('url_to_summarize', None)
874
- # context.user_data.pop('original_message_id', None)
875
- # logger.debug(f"Cleared URL context for user {user.id}") # Moved clearing to after task creation
876
 
877
  # Check necessary configurations before scheduling
878
  global TELEGRAM_TOKEN, _gemini_primary_enabled, _openrouter_fallback_enabled
879
  if not TELEGRAM_TOKEN:
880
  logger.critical("TG TOKEN missing! Cannot schedule task.")
881
- try: await query.edit_message_text(text="❌ Bot configuration error (Token Missing). Cannot proceed.")
882
  except Exception: pass
883
  return
884
  if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
885
  logger.critical("Neither Gemini nor OpenRouter API keys are configured/valid! Cannot summarize.")
886
- try: await query.edit_message_text(text="❌ AI configuration error: No summarization models are available. Cannot proceed.")
887
  except Exception: pass
888
  return
889
  # Log warnings if one model is missing, but proceed if at least one is available
@@ -899,23 +1190,34 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
899
  message_id_to_edit=message_id_to_edit, # Pass the ID of the message with buttons
900
  url=url,
901
  summary_type=summary_type,
902
- bot_token=TELEGRAM_TOKEN
903
  ),
904
  name=f"SummaryTask-{user.id}-{message_id_to_edit}"
905
  )
906
 
907
- # Clear context AFTER scheduling the task to prevent race conditions
908
  context.user_data.pop('url_to_summarize', None)
909
  context.user_data.pop('original_message_id', None)
910
  logger.debug(f"Cleared URL context for user {user.id} after scheduling task.")
911
 
 
 
 
 
 
 
 
 
 
912
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
913
  # ... (Keep existing implementation) ...
914
- ignore_errors = (AttributeError, BadRequest, ) # Add BadRequest to ignored types if retry handles it
 
915
  if isinstance(context.error, ignore_errors):
916
- ignore_messages = ["message is not modified", "query is too old", "message to edit not found"]
917
- if any(msg in str(context.error).lower() for msg in ignore_messages):
918
- logger.debug(f"Ignoring known/handled error in error_handler: {context.error}")
 
919
  return
920
  logger.error("Exception while handling an update:", exc_info=context.error)
921
  # Consider notifying the user about unexpected errors if appropriate and possible
@@ -935,7 +1237,9 @@ async def setup_bot_config() -> Application:
935
  # Add Handlers
936
  application.add_handler(CommandHandler("start", start))
937
  application.add_handler(CommandHandler("help", help_command))
938
- application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND & filters.Entity("url") | filters.Entity("text_link"), handle_potential_url)) # More specific filter
 
 
939
  application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
940
  # Error Handler
941
  application.add_error_handler(error_handler)
@@ -1004,8 +1308,12 @@ async def lifespan(app: Starlette):
1004
  logger.error(f"FATAL: Failed to set webhook: {e}", exc_info=True)
1005
  raise RuntimeError(f"Failed to set webhook: {e}") from e
1006
  else:
1007
- logger.critical("Could not construct webhook URL (SPACE_HOST env var might be missing or invalid).")
1008
- raise RuntimeError("Webhook URL undetermined.")
 
 
 
 
1009
 
1010
  logger.info("ASGI Lifespan: Startup complete.");
1011
  yield # Application runs here
@@ -1040,30 +1348,38 @@ async def lifespan(app: Starlette):
1040
 
1041
 
1042
  async def health_check(request: Request) -> PlainTextResponse:
1043
- # ... (Keep existing implementation, maybe add crawl4ai status?) ...
1044
- global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled, _crawl4ai_primary_web_enabled, _urltotext_fallback_enabled
1045
  bot_status = "Not Initialized"
1046
  bot_username = "N/A"
1047
- if ptb_app and ptb_app.bot:
1048
  try:
1049
- if ptb_app.running:
1050
- # Cache bot_info slightly? Or fetch on demand.
 
1051
  bot_info = await ptb_app.bot.get_me()
1052
  bot_username = f"@{bot_info.username}"
1053
- bot_status = f"Running ({bot_username})"
 
 
1054
  else: bot_status = "Initialized/Not running"
1055
  except Exception as e: bot_status = f"Error checking status: {e}"
 
 
 
1056
 
1057
  health_info = [
1058
- f"TG Bot Summariser - Status: {bot_status}",
1059
- f"Primary Web Scraper: {'Crawl4AI' if _crawl4ai_primary_web_enabled else 'N/A (Disabled)'}",
 
 
1060
  f"Fallback Web Scraper 1: BeautifulSoup",
1061
- f"Fallback Web Scraper 2: {'urltotext.com API' if _urltotext_fallback_enabled else 'N/A (Disabled)'}",
1062
- f"Primary Summarizer: {'Gemini (' + GEMINI_MODEL + ')' if _gemini_primary_enabled else 'N/A (Disabled)'}",
1063
- f"Fallback Summarizer: {'OpenRouter (' + OPENROUTER_MODEL + ')' if _openrouter_fallback_enabled else 'N/A (Disabled)'}",
1064
  f"Primary YT Transcript: youtube-transcript-api",
1065
- f"Fallback YT Transcript 1: {'Supadata API' if SUPADATA_API_KEY else 'N/A (Disabled)'}",
1066
- f"Fallback YT Transcript 2: {'Apify (' + APIFY_ACTOR_ID + ')' if _apify_token_exists else 'N/A (No Token)'}"
1067
  ]
1068
  return PlainTextResponse("\n".join(health_info))
1069
 
@@ -1124,6 +1440,18 @@ if __name__ == '__main__':
1124
  log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
1125
  # Use the PORT env var for local running too, defaulting to 8080
1126
  local_port = int(os.environ.get('PORT', 8080))
 
 
 
 
 
 
 
 
 
 
 
 
1127
  uvicorn.run(
1128
  "main:app",
1129
  host='0.0.0.0',
 
53
  # --- Google Gemini ---
54
  try:
55
  import google.generativeai as genai
56
+ from google.generativeai.types import HarmCategory, HarmBlockThreshold, GenerateContentResponse
57
  _gemini_available = True
58
  except ImportError:
59
  genai = None
60
  HarmCategory = None
61
  HarmBlockThreshold = None
62
+ GenerateContentResponse = None # Add this for type hinting if needed
63
  _gemini_available = False
64
  # logger defined later
65
 
 
112
  # Models (User can still configure via env vars)
113
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Model
114
  APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
115
+ GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-1.5-flash-latest") # Use the 1.5 flash model directly
116
+ # Using gemini-1.5-flash-latest is generally recommended over gemini-2.0-flash-001
117
 
118
  # --- Configuration Checks ---
119
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
 
157
  logger.error(f"Failed to configure Google GenAI client: {e}")
158
  _gemini_primary_enabled = False
159
 
160
+ # --- Constants ---
161
+ MAX_SUMMARY_CHUNK_SIZE = 4000 # Max characters per Telegram message (allow buffer)
162
+ MAX_INPUT_TOKEN_APPROX = 1000000 # Gemini 1.5 Flash context window (approx chars) - adjust if needed
163
+
164
  # --- Retry Decorator ---
165
  # (Remains the same)
166
  @retry(
 
339
  return None
340
 
341
  logger.info(f"[Crawl4AI Primary] Attempting to crawl URL: {url}")
342
+ # Define a writable cache directory (use /tmp in container environments)
343
+ # Create the directory path beforehand to avoid potential race conditions or permission issues within the library
344
+ cache_dir_path = "/tmp/.crawl4ai" # CHANGED: Use /tmp
345
+ try:
346
+ os.makedirs(cache_dir_path, exist_ok=True)
347
+ logger.info(f"[Crawl4AI Primary] Ensured cache directory exists: {cache_dir_path}")
348
+ except OSError as e:
349
+ logger.error(f"[Crawl4AI Primary] Failed to create cache directory {cache_dir_path}: {e}. Crawl may fail.")
350
+ # Don't return here, let the crawler try anyway, it might handle it internally or use default
351
+ except Exception as e:
352
+ logger.error(f"[Crawl4AI Primary] Unexpected error creating cache directory {cache_dir_path}: {e}")
353
+
354
+
355
  try:
356
+ # Use AsyncWebCrawler context manager with explicit cache_dir
357
+ # NOTE: Pass cache_dir here if the library supports it via __init__ or a config object
358
+ # Checking crawl4ai docs/source, AsyncWebCrawler doesn't directly take cache_dir in __init__.
359
+ # It seems to rely on environment variables or default home resolution.
360
+ # The PermissionError happens in RobotsParser -> get_home_folder -> os.makedirs.
361
+ # WORKAROUND: We might need to adjust the environment or hope setting HOME=/app in Dockerfile is enough
362
+ # *if* the library correctly uses HOME. Let's test *without* explicit cache_dir first,
363
+ # relying on HOME=/app and the prior os.makedirs call. If it still fails, we need a different approach.
364
+
365
+ # UPDATE: The traceback shows it uses utils.get_home_folder(). Let's stick with HOME=/app for now
366
+ # and see if the permission error was transient or specific to the '.models' subdir.
367
+ # If it persists, we might need to fork/modify crawl4ai or find another way to configure its paths.
368
+
369
+ # Let's *try* passing cache_dir anyway, maybe it's an undocumented/newer feature
370
+ async with AsyncWebCrawler(cache_dir=cache_dir_path) as crawler: # TRY passing cache_dir
371
+ logger.info(f"[Crawl4AI Primary] Initialized with explicit cache_dir: {cache_dir_path}")
372
  # Use arun for a single URL crawl
 
 
373
  result = await crawler.arun(url=url, crawler_strategy="playwright", timeout=90) # 90 sec timeout
374
 
375
  if result and result.markdown:
 
394
  except asyncio.TimeoutError:
395
  logger.error(f"[Crawl4AI Primary] Timeout occurred while crawling {url}")
396
  return None
397
+ except PermissionError as e: # Catch the specific error
398
+ logger.error(f"[Crawl4AI Primary] Permission denied during crawl for {url}. Likely filesystem issue in container. Error: {e}", exc_info=True)
399
+ return None # Fail gracefully for this method
400
  except Exception as e:
401
+ # Log type error if cache_dir isn't accepted
402
+ if "unexpected keyword argument 'cache_dir'" in str(e):
403
+ logger.error(f"[Crawl4AI Primary] AsyncWebCrawler does not accept 'cache_dir'. Remove this argument. Error: {e}")
404
+ else:
405
+ logger.error(f"[Crawl4AI Primary] Unexpected error during crawl for {url}: {e}", exc_info=True)
406
  return None
407
 
408
 
 
418
  response.raise_for_status()
419
  content_type = response.headers.get('content-type', '').lower()
420
  if 'html' not in content_type: logger.warning(f"[Web Scrape BS4] Non-HTML content type from {url}: {content_type}"); return None
421
+ try: return response.text # Use response.text to let httpx handle decoding
422
+ except Exception as e: logger.error(f"[Web Scrape BS4] Error getting response text for {url}: {e}"); return None
423
  except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape BS4] HTTP error {e.response.status_code} fetching {url}: {e}")
424
  except httpx.TimeoutException: logger.error(f"[Web Scrape BS4] Timeout error fetching {url}")
425
  except httpx.TooManyRedirects: logger.error(f"[Web Scrape BS4] Too many redirects fetching {url}")
 
429
 
430
  async def get_website_content_bs4(url: str) -> Optional[str]:
431
  """Fetches and parses website content using BeautifulSoup (Fallback 1)."""
432
+ # ... (Keep existing implementation) ...
433
  if not url: logger.error("[BS4 Fallback] get_website_content_bs4: No URL"); return None
434
  logger.info(f"[BS4 Fallback] Attempting basic fetch & parse for: {url}")
435
  html_content = await fetch_url_content_for_scrape(url)
 
442
  def parse_html(content):
443
  soup = BeautifulSoup(content, DEFAULT_PARSER)
444
  # Remove common non-content elements
445
+ for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "video", "audio"]):
446
  element.extract()
447
+ # Try to find main content areas more broadly
448
+ selectors = ['main', 'article', '[role="main"]', '#content', '.content', '#main-content', '.main-content', '#body', '.body', '#article-body', '.article-body']
449
+ target_element = None
450
+ for selector in selectors:
451
+ target_element = soup.select_one(selector)
452
+ if target_element: break
453
+
454
+ if not target_element: target_element = soup.body # Fallback to body
455
+ if not target_element: logger.warning(f"[BS4 Fallback] Could not find body/main for parsing {url}"); return None
456
+
457
+ # Extract text, clean up whitespace aggressively
458
  lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
459
+ text = " ".join(lines) # Join lines with spaces
460
+
461
+ # Basic post-cleaning
462
+ text = re.sub(r'\s{2,}', ' ', text).strip() # Replace multiple spaces with single space
463
+
464
+ if not text: logger.warning(f"[BS4 Fallback] Extracted text is empty after cleaning for {url}"); return None
465
  return text
466
 
467
  text_content = await asyncio.to_thread(parse_html, html_content)
 
478
  # Fallback 2: urltotext.com API
479
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
480
  """Fetches website content using urltotext.com API (Fallback 2)."""
481
+ # ... (Keep existing implementation) ...
482
  if not url: logger.error("[API Fallback] No URL"); return None
483
  if not api_key: logger.error("[API Fallback] urltotext.com API key missing."); return None
484
  logger.info(f"[API Fallback] Attempting fetch for: {url} using urltotext.com API")
 
499
  else: logger.warning(f"[API Fallback] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
500
  except json.JSONDecodeError: logger.error(f"[API Fallback] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
501
  except Exception as e: logger.error(f"[API Fallback] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
502
+ elif response.status_code == 402: # Specifically handle insufficient credits
503
+ logger.error(f"[API Fallback] Error 402 (Insufficient Credits) from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
504
+ elif response.status_code in [400, 401, 403, 422, 500]: logger.error(f"[API Fallback] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
505
  else: logger.error(f"[API Fallback] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
506
  except httpx.TimeoutException: logger.error(f"[API Fallback] Timeout connecting to urltotext.com API for {url}"); return None
507
  except httpx.RequestError as e: logger.error(f"[API Fallback] Request error connecting to urltotext.com API for {url}: {e}"); return None
508
  except Exception as e: logger.error(f"[API Fallback] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
509
 
510
  # --- Summarization Functions ---
 
511
  async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
512
+ """ Calls the Google Gemini API to generate a summary. """
513
  global GEMINI_MODEL, _gemini_primary_enabled
514
  if not _gemini_primary_enabled:
515
  logger.error("[Gemini Primary] Called but is disabled.");
516
  return None, "Error: Primary AI service (Gemini) not configured/available."
517
+
518
+ # Truncate input text if it exceeds the approximate limit
519
+ if len(text) > MAX_INPUT_TOKEN_APPROX:
520
+ logger.warning(f"[Gemini Primary] Input text length ({len(text)}) exceeds limit ({MAX_INPUT_TOKEN_APPROX}). Truncating.")
521
+ text = text[:MAX_INPUT_TOKEN_APPROX]
522
+
523
  logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
 
 
 
 
 
524
 
525
+ # Define prompts
526
+ if summary_type == "paragraph":
527
+ prompt = f"""Please summarise the following text into a concise paragraph. Focus on the main points and key information. Avoid unnecessary jargon or overly complex sentences.
528
+
529
+ Text to summarise:
530
+ ---
531
+ {text}
532
+ ---
533
+
534
+ Concise Paragraph Summary:"""
535
+ elif summary_type == "points":
536
+ prompt = f"""Please summarise the following text into a list of key bullet points. Each point should capture a distinct main idea or important piece of information. Aim for clarity and conciseness.
537
+
538
+ Text to summarise:
539
+ ---
540
+ {text}
541
+ ---
542
+
543
+ Key Bullet Points Summary:"""
544
+ else:
545
+ logger.error(f"[Gemini Primary] Invalid summary_type: {summary_type}")
546
+ return None, f"Error: Invalid summary type '{summary_type}' specified."
547
+
548
+ # Configure safety settings (adjust as needed)
549
+ safety_settings = {
550
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
551
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
552
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
553
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
554
+ }
555
+
556
+ # Configure generation settings (optional)
557
+ generation_config = genai.types.GenerationConfig(
558
+ # candidate_count=1, # Default is 1
559
+ # stop_sequences=["\n"],
560
+ max_output_tokens=2048, # Increased max tokens for potentially longer summaries from large inputs
561
+ temperature=0.7, # Adjust creativity vs factualness
562
+ # top_p=1.0, # Default
563
+ # top_k=None # Default
564
+ )
565
+
566
+ try:
567
+ model = genai.GenerativeModel(GEMINI_MODEL)
568
+ logger.debug(f"[Gemini Primary] Sending request to model {GEMINI_MODEL}")
569
+ response: GenerateContentResponse = await model.generate_content_async( # Use async version
570
+ prompt,
571
+ generation_config=generation_config,
572
+ safety_settings=safety_settings
573
+ )
574
+ logger.debug(f"[Gemini Primary] Received response. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}")
575
+
576
+ # Check for safety blocks or other issues in response
577
+ if not response.candidates:
578
+ block_reason = response.prompt_feedback.block_reason if hasattr(response, 'prompt_feedback') else 'Unknown'
579
+ error_msg = f"Error: Gemini response blocked or empty. Reason: {block_reason}"
580
+ logger.error(f"[Gemini Primary] {error_msg}")
581
+ return None, error_msg
582
+
583
+ # Check finish reason (e.g., MAX_TOKENS, SAFETY)
584
+ finish_reason = response.candidates[0].finish_reason
585
+ if finish_reason != genai.types.FinishReason.STOP and finish_reason != genai.types.FinishReason.MAX_TOKENS:
586
+ # Log safety ratings if available
587
+ safety_ratings_str = "N/A"
588
+ if hasattr(response.candidates[0], 'safety_ratings'):
589
+ safety_ratings_str = ', '.join([f"{r.category.name}: {r.probability.name}" for r in response.candidates[0].safety_ratings])
590
+ error_msg = f"Error: Gemini generation finished unexpectedly. Reason: {finish_reason.name}. Safety: {safety_ratings_str}"
591
+ logger.error(f"[Gemini Primary] {error_msg}")
592
+ # Return partial text if available and finish reason is MAX_TOKENS? Maybe not, could be truncated badly.
593
+ # If SAFETY, definitely return error.
594
+ if finish_reason == genai.types.FinishReason.SAFETY:
595
+ return None, error_msg # Return specific error for safety blocks
596
+ # For other reasons, maybe return partial, but safer to return error for now
597
+ # return response.text if hasattr(response, 'text') else None, error_msg # Optional: return partial text for RECITATION/OTHER
598
+ return None, f"Error: Gemini generation finished unexpectedly ({finish_reason.name})."
599
+
600
+
601
+ # Extract text
602
+ summary_text = response.text
603
+ if not summary_text or not summary_text.strip():
604
+ logger.warning("[Gemini Primary] Gemini returned an empty summary.")
605
+ return None, "Error: AI generated an empty summary."
606
+
607
+ logger.info(f"[Gemini Primary] Summary generated successfully (len: {len(summary_text)}).")
608
+ return summary_text.strip(), None
609
+
610
+ except Exception as e:
611
+ logger.error(f"[Gemini Primary] Error during API call to {GEMINI_MODEL}: {e}", exc_info=True)
612
+ # Check for specific Google API errors if needed
613
+ # from google.api_core import exceptions as google_exceptions
614
+ # if isinstance(e, google_exceptions.GoogleAPIError): ...
615
+ return None, f"Error: Failed to communicate with the primary AI service (Gemini). Details: {e}"
616
 
617
 
618
  async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
619
+ """ Calls the OpenRouter API to generate a summary. """
620
  global OPENROUTER_API_KEY, OPENROUTER_MODEL, _openrouter_fallback_enabled
621
  if not _openrouter_fallback_enabled:
622
  logger.error("[OpenRouter Fallback] Called but is disabled.");
623
  return None, "Error: Fallback AI service (OpenRouter) not configured/available."
624
+
625
+ # OpenRouter models might have smaller context windows, truncate more aggressively if needed
626
+ # Example: 32k tokens ~ 120k chars. Deepseek is large though. Check model specifics if issues arise.
627
+ max_input_len_openrouter = 100000 # Adjust based on OPENROUTER_MODEL limits if known
628
+ if len(text) > max_input_len_openrouter:
629
+ logger.warning(f"[OpenRouter Fallback] Input text length ({len(text)}) exceeds approx limit ({max_input_len_openrouter}) for {OPENROUTER_MODEL}. Truncating.")
630
+ text = text[:max_input_len_openrouter]
631
+
632
  logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
 
 
 
 
 
633
 
634
+ # Define prompts (similar structure to Gemini)
635
+ if summary_type == "paragraph":
636
+ prompt_content = f"""Please summarise the following text into a concise paragraph. Focus on the main points and key information.
637
+
638
+ Text:
639
+ ---
640
+ {text}
641
+ ---
642
+
643
+ Concise Paragraph Summary:"""
644
+ elif summary_type == "points":
645
+ prompt_content = f"""Please summarise the following text into a list of key bullet points. Each point should capture a distinct main idea.
646
+
647
+ Text:
648
+ ---
649
+ {text}
650
+ ---
651
+
652
+ Key Bullet Points Summary:"""
653
+ else:
654
+ logger.error(f"[OpenRouter Fallback] Invalid summary_type: {summary_type}")
655
+ return None, f"Error: Invalid summary type '{summary_type}' specified."
656
+
657
+ headers = {
658
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
659
+ "Content-Type": "application/json",
660
+ "HTTP-Referer": "https://github.com/fmab777/telegram-summary-bot", # Optional: Identify your app
661
+ "X-Title": "Telegram Summary Bot", # Optional: Identify your app
662
+ }
663
+ payload = {
664
+ "model": OPENROUTER_MODEL,
665
+ "messages": [
666
+ {"role": "system", "content": "You are an expert summarizer. Provide summaries as requested."},
667
+ {"role": "user", "content": prompt_content}
668
+ ],
669
+ "max_tokens": 2048, # Adjust as needed
670
+ "temperature": 0.7,
671
+ }
672
+
673
+ api_url = "https://openrouter.ai/api/v1/chat/completions"
674
+
675
+ try:
676
+ async with httpx.AsyncClient(timeout=120.0) as client: # Longer timeout for potentially slower models
677
+ logger.debug(f"[OpenRouter Fallback] Sending request to {api_url} for model {OPENROUTER_MODEL}")
678
+ response = await client.post(api_url, headers=headers, json=payload)
679
+ logger.debug(f"[OpenRouter Fallback] Received status code {response.status_code}")
680
+
681
+ if response.status_code == 200:
682
+ try:
683
+ data = response.json()
684
+ if data.get("choices") and len(data["choices"]) > 0:
685
+ message = data["choices"][0].get("message")
686
+ if message and message.get("content"):
687
+ summary_text = message["content"].strip()
688
+ if summary_text:
689
+ finish_reason = data["choices"][0].get("finish_reason", "N/A")
690
+ logger.info(f"[OpenRouter Fallback] Summary generated successfully (len: {len(summary_text)}). Finish: {finish_reason}")
691
+ # Check for length finish reason?
692
+ if finish_reason == 'length':
693
+ logger.warning("[OpenRouter Fallback] Summary may be truncated due to max_tokens limit.")
694
+ return summary_text, None
695
+ else:
696
+ logger.warning("[OpenRouter Fallback] OpenRouter returned an empty summary content.")
697
+ return None, "Error: Fallback AI generated an empty summary."
698
+ else:
699
+ logger.error(f"[OpenRouter Fallback] Invalid response structure (missing message/content). Data: {data}")
700
+ return None, "Error: Fallback AI returned an invalid response format."
701
+ else:
702
+ logger.error(f"[OpenRouter Fallback] Invalid response structure (missing choices). Data: {data}")
703
+ # Check for error object in response
704
+ api_error = data.get("error", {}).get("message", "Unknown API error")
705
+ return None, f"Error: Fallback AI response missing summary. API msg: {api_error}"
706
+
707
+ except json.JSONDecodeError:
708
+ logger.error(f"[OpenRouter Fallback] Failed to decode JSON response. Status: {response.status_code}, Text: {response.text[:500]}")
709
+ return None, "Error: Fallback AI sent an invalid JSON response."
710
+ except Exception as e:
711
+ logger.error(f"[OpenRouter Fallback] Error processing success response: {e}", exc_info=True)
712
+ return None, f"Error: Failed to process Fallback AI response. Details: {e}"
713
+
714
+ else:
715
+ # Handle API errors (rate limits, auth, etc.)
716
+ error_message = f"Error: Fallback AI service ({OPENROUTER_MODEL}) returned status {response.status_code}."
717
+ try:
718
+ error_details = response.json().get("error", {}).get("message", response.text[:200])
719
+ error_message += f" Details: {error_details}"
720
+ except Exception:
721
+ error_message += f" Response: {response.text[:200]}"
722
+ logger.error(f"[OpenRouter Fallback] {error_message}")
723
+ return None, error_message
724
+
725
+ except httpx.TimeoutException:
726
+ logger.error(f"[OpenRouter Fallback] Timeout connecting to OpenRouter API for {OPENROUTER_MODEL}")
727
+ return None, "Error: Timed out connecting to the fallback AI service."
728
+ except httpx.RequestError as e:
729
+ logger.error(f"[OpenRouter Fallback] Request error connecting to OpenRouter API: {e}")
730
+ return None, f"Error: Network error connecting to the fallback AI service. Details: {e}"
731
+ except Exception as e:
732
+ logger.error(f"[OpenRouter Fallback] Unexpected error during OpenRouter API call: {e}", exc_info=True)
733
+ return None, f"Error: Unexpected issue with the fallback AI service. Details: {e}"
734
 
735
 
736
  async def generate_summary(text: str, summary_type: str) -> str:
737
+ """ Generates a summary using the primary AI (Gemini) and falling back to OpenRouter. """
738
  global _gemini_primary_enabled, _openrouter_fallback_enabled, GEMINI_MODEL, OPENROUTER_MODEL
739
  logger.info(f"[Summary Generation] Starting process. Primary: Gemini ({GEMINI_MODEL}), Fallback: OpenRouter ({OPENROUTER_MODEL})")
740
+ final_summary: Optional[str] = None
741
+ error_message: Optional[str] = None # Accumulates errors
742
+
743
+ # --- Attempt Primary AI (Gemini) ---
744
  if _gemini_primary_enabled:
745
  logger.info(f"[Summary Generation] Attempting primary AI: Gemini ({GEMINI_MODEL})")
746
+ primary_summary, primary_error = await _call_gemini(text, summary_type)
747
+ if primary_summary:
748
+ logger.info(f"[Summary Generation] Success with primary AI (Gemini).")
749
+ return primary_summary # Return successful primary summary immediately
750
+ else:
751
+ logger.warning(f"[Summary Generation] Primary AI (Gemini) failed. Error: {primary_error}. Proceeding to fallback.")
752
+ error_message = f"Primary AI ({GEMINI_MODEL}) failed: {primary_error}" # Store primary error
753
  else:
754
  logger.warning("[Summary Generation] Primary AI (Gemini) disabled. Proceeding to fallback.")
755
  error_message = "Primary AI (Gemini) unavailable."
756
 
757
+ # --- Attempt Fallback AI (OpenRouter) ---
758
  if _openrouter_fallback_enabled:
759
  logger.info(f"[Summary Generation] Attempting fallback AI: OpenRouter ({OPENROUTER_MODEL})")
760
  fallback_summary, fallback_error = await _call_openrouter(text, summary_type)
761
+ if fallback_summary:
762
+ logger.info(f"[Summary Generation] Success with fallback AI (OpenRouter).")
763
+ return fallback_summary # Return successful fallback summary
764
  else:
765
  logger.error(f"[Summary Generation] Fallback AI (OpenRouter) also failed. Error: {fallback_error}")
766
+ # Combine errors for final message
767
+ if error_message: # If primary also failed
768
+ return f"{error_message}\nFallback AI ({OPENROUTER_MODEL}) also failed: {fallback_error}"
769
+ else: # Should not happen if logic is correct, but fallback just in case
770
+ return f"Fallback AI ({OPENROUTER_MODEL}) failed: {fallback_error}"
771
  else:
772
  logger.error("[Summary Generation] Fallback AI (OpenRouter) is disabled. Cannot proceed.")
773
+ if error_message: # Primary failed AND fallback disabled
774
+ return f"{error_message}\nFallback AI is also unavailable."
775
+ else: # Primary disabled AND fallback disabled
776
+ return "Error: Both primary and fallback AI services are unavailable."
777
 
778
+ # This part should ideally not be reached if the logic above is sound
779
+ logger.error("[Summary Generation] Reached end of function unexpectedly. No summary generated.")
780
+ final_error = error_message or "Unknown summary generation error."
781
+ return f"Sorry, an error occurred: {final_error}"
782
 
783
 
784
  # --- Main Processing Task ---
 
820
  message_id=status_message_id,
821
  text=processing_message_text,
822
  parse_mode=ParseMode.HTML, # Use HTML for escaped URL
823
+ reply_markup=None,
824
+ link_preview_options={'is_disabled': True} # Disable preview here too
825
  )
826
  logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
827
  except Exception as e:
 
834
  bot.send_message,
835
  chat_id=chat_id,
836
  text=processing_message_text,
837
+ parse_mode=ParseMode.HTML, # Use HTML for escaped URL
838
+ link_preview_options={'is_disabled': True}
839
  )
840
  if status_message:
841
  message_to_delete_later_id = status_message.message_id
842
  logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
843
  else:
 
844
  raise RuntimeError("Failed to send status message after retries.")
845
  except Exception as e:
846
  logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}")
 
847
  user_feedback_message = "Sorry, there was an issue starting the process."
848
  # Attempt to send final feedback later if possible
849
 
 
873
  logger.warning(f"[Task {task_id}] Crawl4AI failed for {url}. Attempting BeautifulSoup (Fallback 1)...")
874
  try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
875
  except Exception: pass
876
+ content = await get_website_content_bs4(url)
877
 
878
  if not content:
879
  logger.warning(f"[Task {task_id}] BeautifulSoup also failed for {url}. Attempting API (Fallback 2)...")
 
883
  except Exception: pass
884
  content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
885
  if not content:
886
+ # Check if the specific error was insufficient credits
887
+ # Note: get_website_content_via_api already logs the specific error
888
+ logger.error(f"[Task {task_id}] API fallback (urltotext) also failed for {url}.")
889
+ user_feedback_message = "Sorry, I couldn't fetch content from that website using any method (Crawl4AI/BS4 failed, API failed or ran out of credits)." # Updated message
890
  else:
891
  logger.warning(f"[Task {task_id}] API fallback is disabled. Cannot attempt Fallback 2.")
892
+ user_feedback_message = "Sorry, I couldn't fetch content from that website using Crawl4AI or BeautifulSoup, and the API fallback is not enabled." # Updated message
893
+
894
+ # Final check if all web methods failed
895
+ if not content and not user_feedback_message:
896
+ logger.error(f"[Task {task_id}] All website fetching methods seem to have failed without setting a specific user message.")
897
+ user_feedback_message = "Sorry, I couldn't fetch content from that website using any available method (blocked/inaccessible/empty?)."
898
 
899
 
900
  # --- Generate Summary if Content was Fetched ---
 
910
  logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
911
  else:
912
  # Success - Send the summary
 
913
  summary_parts = []
914
  current_part = ""
915
+ # Split respecting newlines, ensure no part exceeds MAX_SUMMARY_CHUNK_SIZE
916
+ lines = final_summary.splitlines(keepends=True)
917
+ for line in lines:
918
+ # If adding the next line exceeds the limit, finalize the current part
919
+ if len(current_part) + len(line) > MAX_SUMMARY_CHUNK_SIZE:
920
+ if current_part.strip(): # Don't add empty parts
921
+ summary_parts.append(current_part.strip())
922
+ current_part = line # Start new part with the current line
923
+ # If a single line itself is too long, truncate it (edge case)
924
+ if len(current_part) > MAX_SUMMARY_CHUNK_SIZE:
925
+ logger.warning(f"[Task {task_id}] Truncating overly long line in summary.")
926
+ current_part = current_part[:MAX_SUMMARY_CHUNK_SIZE]
927
  else:
928
+ current_part += line
929
+
930
+ # Add the last part if it has content
931
+ if current_part.strip():
932
  summary_parts.append(current_part.strip())
933
 
934
+ # If somehow splitting resulted in nothing (e.g., empty summary initially?)
935
+ if not summary_parts:
936
+ summary_parts.append("Summary generated, but it appears to be empty.")
937
+ logger.warning(f"[Task {task_id}] Summary was non-empty initially but splitting resulted in zero parts.")
938
+
939
 
940
+ logger.info(f"[Task {task_id}] Summary generated (orig len: {len(final_summary)}). Sending in {len(summary_parts)} part(s).")
941
+
942
+ # Determine the target message ID for the *first* part
943
+ # Prefer editing the "Processing..." message if we sent a new one
944
+ edit_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
945
  message_sent = False
946
+
947
+ if edit_target_id:
948
  try:
949
  # Try editing the status message first
950
  await retry_bot_operation(
951
  bot.edit_message_text,
952
  chat_id=chat_id,
953
+ message_id=edit_target_id,
954
  text=summary_parts[0],
955
  parse_mode=None, # Send as plain text initially, safer
956
  link_preview_options={'is_disabled': True}
957
  )
958
+ logger.debug(f"[Task {task_id}] Edited message {edit_target_id} with first summary part.")
959
+ # Prevent this message from being deleted later if it was the 'Processing...' one
960
+ if message_to_delete_later_id == edit_target_id: message_to_delete_later_id = None
961
+ # If it was the *original* button message that we are editing, keep status_message_id
962
+ # so we know *not* to delete it in finally block if it's the only message left.
963
+ # However, it's clearer to just prevent deletion if edited.
964
+ if status_message_id == edit_target_id: status_message_id = None # Mark as handled
965
+
966
  message_sent = True
967
  except Exception as edit_err:
968
+ logger.warning(f"[Task {task_id}] Failed to edit message {edit_target_id} with summary: {edit_err}. Sending new message instead.")
969
  # If edit fails, fall through to send a new message
970
 
971
  if not message_sent:
972
+ # Send the first part as a new message
973
+ sent_msg = await retry_bot_operation(
974
  bot.send_message,
975
  chat_id=chat_id,
976
  text=summary_parts[0],
977
  parse_mode=None,
978
  link_preview_options={'is_disabled': True}
979
+ )
980
+ if sent_msg:
981
+ logger.debug(f"[Task {task_id}] Sent first summary part as new message {sent_msg.message_id}.")
982
+ else: # Should be caught by retry, but log defensively
983
+ logger.error(f"[Task {task_id}] Failed to send first summary part even as new message.")
984
+ user_feedback_message = "Sorry, failed to send the summary." # Set error
985
+
986
+
987
+ # Send remaining parts (if any and first part succeeded)
988
+ if not user_feedback_message and len(summary_parts) > 1:
989
+ for i, part in enumerate(summary_parts[1:], start=2):
990
+ await asyncio.sleep(0.5) # Small delay between parts
991
+ try:
992
+ await retry_bot_operation(
993
+ bot.send_message,
994
+ chat_id=chat_id,
995
+ text=part,
996
+ parse_mode=None,
997
+ link_preview_options={'is_disabled': True}
998
+ )
999
+ logger.debug(f"[Task {task_id}] Sent summary part {i}/{len(summary_parts)}.")
1000
+ except Exception as part_err:
1001
+ logger.error(f"[Task {task_id}] Failed to send summary part {i}: {part_err}")
1002
+ user_feedback_message = f"Sorry, failed to send part {i} of the summary."
1003
+ # Should we stop sending further parts? Yes.
1004
+ break # Stop sending remaining parts
1005
+
1006
+ # Determine overall success based on whether feedback message is set
1007
+ if not user_feedback_message:
1008
+ success = True
1009
+ # user_feedback_message = None # Clear feedback message ONLY on full success
1010
 
1011
  # --- Handle Cases Where No Content Was Fetched or Summary Failed ---
1012
+ if user_feedback_message: # Check if any error occurred
1013
+ logger.warning(f"[Task {task_id}] Sending failure/error feedback to user: {user_feedback_message}")
1014
  try:
1015
  # Try editing the status message first
1016
  feedback_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
 
1026
  reply_markup=None # Remove buttons
1027
  )
1028
  logger.debug(f"[Task {task_id}] Edited message {feedback_target_id} with failure feedback.")
1029
+ # Prevent deletion if edited
1030
  if message_to_delete_later_id == feedback_target_id: message_to_delete_later_id = None
1031
+ if status_message_id == feedback_target_id: status_message_id = None
1032
  message_sent = True
1033
  except Exception as edit_err:
1034
  logger.warning(f"[Task {task_id}] Failed to edit message {feedback_target_id} with failure feedback: {edit_err}. Sending new message instead.")
 
1048
  # Catch-all for unexpected errors during the main processing logic
1049
  logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True)
1050
  user_feedback_message = "Oops! Something went wrong while processing your request. Please try again later."
1051
+ if bot: # Ensure bot exists before trying to send
1052
+ try:
1053
+ # Attempt to send a final error message
1054
+ await retry_bot_operation(
1055
+ bot.send_message,
1056
+ chat_id=chat_id,
1057
+ text=user_feedback_message
1058
+ )
1059
+ except Exception as final_err:
1060
+ logger.error(f"[Task {task_id}] Failed to send the final unexpected error feedback: {final_err}")
1061
 
1062
  finally:
1063
  # --- Cleanup ---
1064
+ # Delete the temporary "Processing..." message if it exists and wasn't edited/handled
1065
+ if message_to_delete_later_id and bot:
 
1066
  try:
1067
+ await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=message_to_delete_later_id)
1068
+ logger.debug(f"[Task {task_id}] Deleted temporary status message {message_to_delete_later_id}")
1069
  except Exception as del_e:
1070
+ logger.warning(f"[Task {task_id}] Failed to delete temporary status message {message_to_delete_later_id}: {del_e}")
1071
+
1072
+ # Explicitly DO NOT delete the original message with buttons (status_message_id)
1073
+ # if it was successfully edited with the final result or error message.
1074
+ # The logic above sets status_message_id = None if it was edited.
1075
+ # If status_message_id still holds the ID here, it means editing failed and we sent a *new* message.
1076
+ # In that failure case, maybe we *should* delete the original button message? Or leave it?
1077
+ # Let's leave it for now to avoid deleting user context if things went very wrong.
1078
+ # Deleting message_to_delete_later_id covers the main cleanup case.
1079
 
1080
  # Close the background bot's HTTP client
1081
  if background_request and hasattr(background_request, '_client') and background_request._client:
 
1090
 
1091
  # --- Telegram Handlers ---
1092
  # (start, help_command, handle_potential_url, handle_summary_type_callback, error_handler)
1093
+ # These remain largely the same.
 
1094
 
1095
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
1096
  # ... (Keep existing implementation) ...
 
1104
  user = update.effective_user
1105
  if not user or not update.message: return
1106
  logger.info(f"User {user.id} ({user.username or 'N/A'}) used /help.")
1107
+ # Updated help text slightly
1108
  help_text = ( "πŸ” **How to use:**\n\n"
1109
  "1. Send me any YouTube video link or website URL.\n"
1110
  "2. I'll ask how you want it summarised (paragraph or points).\n"
1111
  "3. Click the button for your choice.\n"
1112
  "4. Wait for the summary!\n\n"
1113
  "βš™οΈ **Behind the scenes:**\n"
1114
+ f"β€’ **Websites:** I try `Crawl4AI` (smart crawl), then `BeautifulSoup` (basic scrape), and `urltotext.com` API (if configured & credits available).\n"
1115
+ "β€’ **YouTube:** I use `youtube-transcript-api` first, then fall back to `Supadata` and `Apify` APIs if needed.\n"
1116
+ f"β€’ **Summaries:** Generated using Google `{GEMINI_MODEL}` (primary) or `{OPENROUTER_MODEL}` (fallback, if configured).\n\n"
1117
  "**Commands:**\n"
1118
  "`/start` - Display welcome message\n"
1119
  "`/help` - Show this help message" )
 
1126
  if not user: return
1127
  # Basic URL validation
1128
  if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]:
 
1129
  logger.debug(f"Ignoring non-URL from {user.id}: {url}")
1130
  # Optionally reply to the user that it doesn't look like a valid URL
1131
+ await update.message.reply_text("Hmm, that doesn't look like a valid web URL. Please make sure it starts with `http://` or `https://`.", parse_mode=ParseMode.MARKDOWN)
1132
  return
1133
  logger.info(f"User {user.id} ({user.username or 'N/A'}) sent potential URL: {url}")
1134
  # Store URL and original message ID in user_data
 
1145
  if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
1146
  user = query.from_user; summary_type = query.data; query_id = query.id
1147
  try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id} ({user.username or 'N/A'})")
1148
+ except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True) # Log but continue
1149
 
1150
  url = context.user_data.get('url_to_summarize')
1151
  message_id_to_edit = query.message.message_id # The message with the buttons
 
1154
  if not url:
1155
  logger.warning(f"No URL in context for user {user.id} (cb {query_id}). Expired?")
1156
  try:
 
1157
  await query.edit_message_text(text="Sorry, I seem to have lost the context for that link. πŸ€” Please send the URL again.", reply_markup=None)
1158
  except BadRequest as e:
1159
  if "message is not modified" in str(e).lower(): pass # Ignore if text is the same
1160
  else: logger.error(f"Failed edit 'URL not found' msg: {e}")
1161
+ except Exception as e: logger.error(f"Failed edit 'URL not found' msg: {e}")
1162
+ return # Do not proceed further
 
 
1163
 
1164
+ # Clear context *only after* successfully scheduling the task below
1165
+ # context.user_data.pop('url_to_summarize', None) # Moved clearing
1166
+ # context.user_data.pop('original_message_id', None) # Moved clearing
 
1167
 
1168
  # Check necessary configurations before scheduling
1169
  global TELEGRAM_TOKEN, _gemini_primary_enabled, _openrouter_fallback_enabled
1170
  if not TELEGRAM_TOKEN:
1171
  logger.critical("TG TOKEN missing! Cannot schedule task.")
1172
+ try: await query.edit_message_text(text="❌ Bot configuration error (Token Missing). Cannot proceed.", reply_markup=None)
1173
  except Exception: pass
1174
  return
1175
  if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
1176
  logger.critical("Neither Gemini nor OpenRouter API keys are configured/valid! Cannot summarize.")
1177
+ try: await query.edit_message_text(text="❌ AI configuration error: No summarization models are available. Cannot proceed.", reply_markup=None)
1178
  except Exception: pass
1179
  return
1180
  # Log warnings if one model is missing, but proceed if at least one is available
 
1190
  message_id_to_edit=message_id_to_edit, # Pass the ID of the message with buttons
1191
  url=url,
1192
  summary_type=summary_type,
1193
+ bot_token=TELEGRAM_TOKEN # Pass token explicitly
1194
  ),
1195
  name=f"SummaryTask-{user.id}-{message_id_to_edit}"
1196
  )
1197
 
1198
+ # Clear context AFTER scheduling the task to prevent race conditions if user clicks fast
1199
  context.user_data.pop('url_to_summarize', None)
1200
  context.user_data.pop('original_message_id', None)
1201
  logger.debug(f"Cleared URL context for user {user.id} after scheduling task.")
1202
 
1203
+ # Optionally edit the button message *immediately* to give feedback before the task edits it again
1204
+ # This prevents the user clicking again while the task starts up.
1205
+ # try:
1206
+ # await query.edit_message_text(text=f"Okay, starting '{summary_type}' summary...", reply_markup=None)
1207
+ # except Exception as e:
1208
+ # logger.warning(f"Could not edit button message immediately after scheduling: {e}")
1209
+ # This initial edit will be quickly overwritten by the task's "Processing..." message.
1210
+
1211
+
1212
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
1213
  # ... (Keep existing implementation) ...
1214
+ # Consider adding specific TelegramError types if needed
1215
+ ignore_errors = (AttributeError, BadRequest, TimedOut, NetworkError, RetryAfter) # Add common transient errors
1216
  if isinstance(context.error, ignore_errors):
1217
+ ignore_messages = ["message is not modified", "query is too old", "message to edit not found", "chat not found", "bot was blocked by the user"]
1218
+ err_str = str(context.error).lower()
1219
+ if any(msg in err_str for msg in ignore_messages) or isinstance(context.error, (TimedOut, NetworkError, RetryAfter)):
1220
+ logger.warning(f"Ignoring known/handled/transient error in error_handler: {context.error}")
1221
  return
1222
  logger.error("Exception while handling an update:", exc_info=context.error)
1223
  # Consider notifying the user about unexpected errors if appropriate and possible
 
1237
  # Add Handlers
1238
  application.add_handler(CommandHandler("start", start))
1239
  application.add_handler(CommandHandler("help", help_command))
1240
+ # Use a slightly broader filter to catch URLs even without explicit entity type from Telegram
1241
+ url_filter = filters.TEXT & ~filters.COMMAND & (filters.Entity("url") | filters.Entity("text_link") | filters.Regex(r'https?://[^\s]+'))
1242
+ application.add_handler(MessageHandler(url_filter, handle_potential_url))
1243
  application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
1244
  # Error Handler
1245
  application.add_error_handler(error_handler)
 
1308
  logger.error(f"FATAL: Failed to set webhook: {e}", exc_info=True)
1309
  raise RuntimeError(f"Failed to set webhook: {e}") from e
1310
  else:
1311
+ # Attempt to get URL from request headers if available (might work in some environments)
1312
+ # This is less reliable than SPACE_HOST
1313
+ logger.warning("SPACE_HOST environment variable not found. Webhook URL cannot be determined reliably for setup.")
1314
+ # You might decide to raise an error or try to run in polling mode if webhook fails
1315
+ raise RuntimeError("Webhook URL undetermined (SPACE_HOST missing).")
1316
+
1317
 
1318
  logger.info("ASGI Lifespan: Startup complete.");
1319
  yield # Application runs here
 
1348
 
1349
 
1350
  async def health_check(request: Request) -> PlainTextResponse:
1351
+ # ... (Keep existing implementation, updated with model names) ...
1352
+ global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled, _crawl4ai_primary_web_enabled, _urltotext_fallback_enabled, SUPADATA_API_KEY
1353
  bot_status = "Not Initialized"
1354
  bot_username = "N/A"
1355
+ if ptb_app and ptb_app.bot and ptb_app.initialized: # Check if initialized
1356
  try:
1357
+ # Quick check if webhook seems ok, more reliable than get_me() sometimes
1358
+ wh_info = await ptb_app.bot.get_webhook_info()
1359
+ if ptb_app.running and wh_info and wh_info.url:
1360
  bot_info = await ptb_app.bot.get_me()
1361
  bot_username = f"@{bot_info.username}"
1362
+ bot_status = f"Running (Webhook OK, {bot_username})"
1363
+ elif ptb_app.running:
1364
+ bot_status = "Running (Webhook check failed or not set)"
1365
  else: bot_status = "Initialized/Not running"
1366
  except Exception as e: bot_status = f"Error checking status: {e}"
1367
+ elif ptb_app:
1368
+ bot_status = "Initializing..."
1369
+
1370
 
1371
  health_info = [
1372
+ f"=== Telegram Summary Bot Status ===",
1373
+ f"Bot Application: {bot_status}",
1374
+ "--- Services ---",
1375
+ f"Primary Web Scraper: {'Crawl4AI' if _crawl4ai_primary_web_enabled else 'DISABLED (Lib Missing)'}",
1376
  f"Fallback Web Scraper 1: BeautifulSoup",
1377
+ f"Fallback Web Scraper 2: {'urltotext.com API' if _urltotext_fallback_enabled else 'DISABLED (No Key)'}",
1378
+ f"Primary Summarizer: {'Gemini (' + GEMINI_MODEL + ')' if _gemini_primary_enabled else 'DISABLED (No Key/Lib)'}",
1379
+ f"Fallback Summarizer: {'OpenRouter (' + OPENROUTER_MODEL + ')' if _openrouter_fallback_enabled else 'DISABLED (No Key)'}",
1380
  f"Primary YT Transcript: youtube-transcript-api",
1381
+ f"Fallback YT Transcript 1: {'Supadata API' if SUPADATA_API_KEY else 'DISABLED (No Key)'}",
1382
+ f"Fallback YT Transcript 2: {'Apify (' + APIFY_ACTOR_ID + ')' if _apify_token_exists else 'DISABLED (No Key)'}"
1383
  ]
1384
  return PlainTextResponse("\n".join(health_info))
1385
 
 
1440
  log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
1441
  # Use the PORT env var for local running too, defaulting to 8080
1442
  local_port = int(os.environ.get('PORT', 8080))
1443
+
1444
+ # Make sure necessary env vars are loaded for local dev if not set system-wide
1445
+ # Example using python-dotenv if you add it to requirements-dev.txt
1446
+ # from dotenv import load_dotenv
1447
+ # load_dotenv()
1448
+ # logger.info("Loaded environment variables from .env file for local development.")
1449
+
1450
+ # Re-check required tokens after potential .env load
1451
+ if not get_secret('TELEGRAM_TOKEN'): logger.critical("Local Dev: TELEGRAM_TOKEN not found.")
1452
+ if not get_secret('GEMINI_API_KEY'): logger.error("Local Dev: GEMINI_API_KEY not found.")
1453
+ # Add checks for other keys as needed for local testing
1454
+
1455
  uvicorn.run(
1456
  "main:app",
1457
  host='0.0.0.0',