fmab777 commited on
Commit
68d4bf1
·
verified ·
1 Parent(s): 300d904

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +430 -159
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Corrected SyntaxError at line 424 - Now with Gemini 2.0 as primary)
2
  import os
3
  import re
4
  import logging
@@ -52,6 +52,15 @@ except ImportError:
52
  _gemini_available = False
53
  # logger will be defined later, log warning after logger setup
54
 
 
 
 
 
 
 
 
 
 
55
 
56
  # --- Logging Setup ---
57
  logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO )
@@ -63,9 +72,14 @@ logging.getLogger('gunicorn.error').setLevel(logging.INFO)
63
  logging.getLogger('uvicorn').setLevel(logging.INFO)
64
  logging.getLogger('starlette').setLevel(logging.INFO)
65
  if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
 
 
 
 
66
  logger = logging.getLogger(__name__)
67
  logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
68
  if not _gemini_available: logger.warning("google-generativeai library not found. Gemini functionality disabled.")
 
69
 
70
 
71
  # --- Global variable for PTB app ---
@@ -81,16 +95,16 @@ def get_secret(secret_name):
81
 
82
  TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
83
  OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY') # Now Fallback
84
- URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
85
- SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
86
- APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
87
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
88
- GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Now Primary
89
 
90
  # Models (User can still configure via env vars)
91
- OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Model
92
  APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
93
- GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash-001") # Primary Model
94
 
95
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
96
  if not GEMINI_API_KEY: logger.error("❌ ERROR: GEMINI_API_KEY not found. Primary summarization (Gemini) will fail.")
@@ -101,19 +115,29 @@ if not _gemini_available: logger.warning("⚠️ WARNING: google-generativeai li
101
  elif not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found or empty. Gemini disabled.")
102
 
103
  _openrouter_fallback_enabled = bool(OPENROUTER_API_KEY)
104
- if not _openrouter_fallback_enabled: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback disabled.")
105
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
- if not URLTOTEXT_API_KEY: pass
108
- if not SUPADATA_API_KEY: pass
109
- if not APIFY_API_TOKEN: pass
110
  if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
111
 
112
  logger.info("Secret loading and configuration check finished.")
113
- logger.info(f"Using Gemini Model (Primary): {GEMINI_MODEL if _gemini_primary_enabled else 'DISABLED'}")
114
- logger.info(f"Using OpenRouter Model (Fallback): {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}")
 
 
 
115
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
116
- _apify_token_exists = bool(APIFY_API_TOKEN)
117
 
118
 
119
  if _gemini_primary_enabled:
@@ -143,26 +167,8 @@ def extract_youtube_id(url):
143
 
144
 
145
  # --- Content Fetching Functions ---
146
- # (fetch_url_content_for_scrape, get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript, get_website_content, get_website_content_via_api remain the same as previous version)
147
- async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
148
- headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
149
- try:
150
- async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
151
- logger.debug(f"[Web Scrape] Sending request to {url}")
152
- response = await client.get(url)
153
- logger.debug(f"[Web Scrape] Received response {response.status_code} from {url}")
154
- response.raise_for_status()
155
- content_type = response.headers.get('content-type', '').lower()
156
- if 'html' not in content_type: logger.warning(f"[Web Scrape] Non-HTML content type from {url}: {content_type}"); return None
157
- try: return response.text
158
- except Exception as e: logger.error(f"[Web Scrape] Error decoding response for {url}: {e}"); return None
159
- except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape] HTTP error {e.response.status_code} fetching {url}: {e}")
160
- except httpx.TimeoutException: logger.error(f"[Web Scrape] Timeout error fetching {url}")
161
- except httpx.TooManyRedirects: logger.error(f"[Web Scrape] Too many redirects fetching {url}")
162
- except httpx.RequestError as e: logger.error(f"[Web Scrape] Request error fetching {url}: {e}")
163
- except Exception as e: logger.error(f"[Web Scrape] Unexpected error fetching {url}: {e}", exc_info=True)
164
- return None
165
 
 
166
  async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
167
  if not video_id: logger.error("[Supadata] No video_id provided"); return None
168
  if not api_key: logger.error("[Supadata] API key missing."); return None
@@ -278,55 +284,128 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
278
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
279
  return transcript_text
280
 
281
- async def get_website_content(url: str) -> Optional[str]:
282
- if not url: logger.error("get_website_content: No URL"); return None
283
- logger.info(f"[Primary Web] Fetching website content for: {url}")
284
- html_content = await fetch_url_content_for_scrape(url)
285
- if not html_content: return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  try:
287
  def parse_html(content):
288
  soup = BeautifulSoup(content, DEFAULT_PARSER)
 
289
  for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]): element.extract()
290
  main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
291
  target_element = main_content if main_content else soup.body
292
- if not target_element: logger.warning(f"[Primary Web] Could not find body/main for parsing {url}"); return None
293
  lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
294
  text = " ".join(lines)
295
- if not text: logger.warning(f"[Primary Web] Extracted text empty after clean for {url}"); return None
296
  return text
 
297
  text_content = await asyncio.to_thread(parse_html, html_content)
298
- if text_content: logger.info(f"[Primary Web] Success scrape for {url} (final len: {len(text_content)})"); return text_content
299
- else: return None
300
- except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
 
 
301
 
 
302
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
303
- if not url: logger.error("[Fallback Web API] No URL"); return None
304
- if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
305
- logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
 
 
 
 
 
306
  api_endpoint = "https://urltotext.com/api/v1/urltotext/"
307
  payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
308
  headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" }
309
  try:
310
  async with httpx.AsyncClient(timeout=45.0) as client:
311
- logger.debug(f"[Fallback Web API] Sending request to urltotext.com API for {url}")
312
  response = await client.post(api_endpoint, headers=headers, json=payload)
313
- logger.debug(f"[Fallback Web API] Received status {response.status_code} from urltotext.com API for {url}")
314
  if response.status_code == 200:
315
  try:
316
  data = response.json()
317
  content = data.get("data", {}).get("content"); credits = data.get("credits_used", "N/A"); warning = data.get("data", {}).get("warning")
318
- if warning: logger.warning(f"[Fallback Web API] urltotext.com API Warning for {url}: {warning}")
319
- if content: logger.info(f"[Fallback Web API] Success via urltotext.com API for {url}. Len: {len(content)}. Credits: {credits}"); return content.strip()
320
- else: logger.warning(f"[Fallback Web API] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
321
- except json.JSONDecodeError: logger.error(f"[Fallback Web API] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
322
- except Exception as e: logger.error(f"[Fallback Web API] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
323
- elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[Fallback Web API] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
324
- else: logger.error(f"[Fallback Web API] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
325
- except httpx.TimeoutException: logger.error(f"[Fallback Web API] Timeout connecting to urltotext.com API for {url}"); return None
326
- except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
327
- except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
328
-
329
- # --- Summarization Functions ---
330
 
331
  async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
332
  """Internal function to call Gemini API. Returns (summary, error_message)."""
@@ -336,7 +415,7 @@ async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Opt
336
  return None, "Error: Primary AI service (Gemini) not configured/available."
337
  logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
338
 
339
- # Define prompts
340
  if summary_type == "paragraph":
341
  prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
342
  "• Clear and simple language suitable for someone unfamiliar with the topic.\n"
@@ -412,9 +491,6 @@ async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Opt
412
 
413
  if summary:
414
  logger.info(f"[Gemini Primary] Success generating summary. Output len: {len(summary)}");
415
- # Escape Markdown for Telegram only if necessary (Removed escaping as it might conflict with plain heading)
416
- # escaped_summary = summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
417
- # Let's see if the AI respects the instruction without further escaping. If not, we might need selective escaping.
418
  return summary.strip(), None
419
  else:
420
  finish_reason = response.candidates[0].finish_reason if response.candidates else 'N/A'
@@ -433,7 +509,7 @@ async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str],
433
  return None, "Error: Fallback AI service (OpenRouter) not configured/available."
434
  logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
435
 
436
- # Define prompts
437
  if summary_type == "paragraph":
438
  prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
439
  "• Clear and simple language suitable for someone unfamiliar with the topic.\n"
@@ -493,8 +569,6 @@ async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str],
493
  summary = message.get("content")
494
  if summary:
495
  logger.info(f"[OpenRouter Fallback] Success via OpenRouter. Output len: {len(summary)}")
496
- # Escape Markdown for Telegram only if necessary (Removed escaping)
497
- # escaped_summary = summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
498
  return summary.strip(), None
499
  else:
500
  logger.warning(f"[OpenRouter Fallback] OpenRouter success but content empty. Resp: {data}")
@@ -585,14 +659,21 @@ async def generate_summary(text: str, summary_type: str) -> str:
585
  return "Sorry, an unknown error occurred during summary generation after trying all available models."
586
 
587
 
588
- # (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
589
  async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
590
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
591
  background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
592
  try: background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 ); bot = Bot(token=bot_token, request=background_request)
593
  except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
594
- content = None; user_feedback_message = None; success = False; status_message_id = message_id_to_edit; message_to_delete_later_id : Optional[int] = None
 
 
 
 
 
 
595
  try:
 
596
  processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
597
  if status_message_id:
598
  try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None ); logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
@@ -603,67 +684,132 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
603
  if status_message: message_to_delete_later_id = status_message.message_id; logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
604
  else: raise RuntimeError("Failed to send status message after retries.")
605
  except Exception as e: logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}"); raise
 
606
  try:
607
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
608
- is_youtube = is_youtube_url(url); logger.debug(f"[Task {task_id}] URL type: {'YouTube' if is_youtube else 'Website'}")
 
 
 
609
  if is_youtube:
610
  video_id = extract_youtube_id(url)
611
- if video_id: content = await get_youtube_transcript(video_id, url)
612
- else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
613
- if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
 
 
 
 
 
 
614
  else:
615
- content = await get_website_content(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
  if not content:
617
- logger.warning(f"[Task {task_id}] Primary web scrape failed for {url}. Trying fallback API.")
618
- global URLTOTEXT_API_KEY
619
- if URLTOTEXT_API_KEY:
620
- await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
621
- content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
622
- if not content: user_feedback_message = "Sorry, I couldn't fetch content from that website using either method (blocked/inaccessible/empty?)."
623
- else: user_feedback_message = "Sorry, I couldn't fetch content from that website (blocked/inaccessible/empty?). The fallback method is not configured."
 
624
  if content:
625
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
626
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
627
- final_summary = await generate_summary(content, summary_type) # This now calls Gemini first, then OpenRouter
628
- if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
 
 
 
 
 
629
  else:
630
  max_length = 4096; summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
631
- # Sending potentially unescaped summary directly - assuming AI follows instructions
632
- # ParseMode.MARKDOWN might interpret unintended things, test needed. Consider ParseMode=None if issues arise.
633
  await retry_bot_operation( bot.send_message, chat_id=chat_id, text=summary_parts[0], parse_mode=None, link_preview_options={'is_disabled': True} )
634
- for part in summary_parts[1:]: await asyncio.sleep(0.5); await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=None, link_preview_options={'is_disabled': True} )
635
- success = True; logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts)."); user_feedback_message = None
636
- elif user_feedback_message: logger.warning(f"[Task {task_id}] Sending failure feedback: {user_feedback_message}"); await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )
 
 
 
 
 
 
 
 
 
637
  except Exception as e:
638
- logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True); user_feedback_message = "Oops! Something went really wrong. Please try again later."
 
639
  try: await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
640
  except Exception: logger.error(f"[Task {task_id}] Failed to send unexpected error feedback.")
 
641
  except Exception as outer_e:
642
  logger.critical(f"[Task {task_id}] Critical outer error: {outer_e}", exc_info=True)
643
  try:
644
- if bot: await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ Critical internal error occurred." )
645
  except Exception: logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
 
646
  finally:
 
647
  delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
648
  if delete_target_id and bot:
649
  try: await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id); logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
650
  except Exception as del_e: logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
 
 
651
  if background_request and hasattr(background_request, '_client') and background_request._client:
652
  try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
653
  except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
654
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
655
 
 
 
656
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
657
  user = update.effective_user; mention = user.mention_html()
658
  if not user or not update.message: return
659
  logger.info(f"User {user.id} used /start.")
660
- await update.message.reply_html( f"👋 Hello {mention}! I can summarise YouTube links or website URLs.\n\nJust send me a link anytime!" ) # Changed 'summarize'
661
 
662
  async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
663
  user = update.effective_user
664
  if not user or not update.message: return
665
  logger.info(f"User {user.id} used /help.")
666
- help_text = ( "🔍 How to use this bot:\n\n" "1. Send me any YouTube video link or website URL.\n" "2. I'll ask you how you want it summarised (paragraph or points).\n" # Changed 'summarized'
667
  "3. Click the button for your choice.\n" "4. Wait for the summary!\n\n" "I'll try multiple methods to get content if the first one fails (especially for YouTube transcripts).\n\n" "Commands:\n" "`/start` - Display welcome message\n" "`/help` - Show this help message" )
668
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
669
 
@@ -671,67 +817,111 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
671
  if not update.message or not update.message.text: return
672
  url = update.message.text.strip(); user = update.effective_user
673
  if not user: return
674
- if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]: logger.debug(f"Ignoring non-URL from {user.id}: {url}"); return
 
 
 
 
 
675
  logger.info(f"User {user.id} sent potential URL: {url}")
676
- context.user_data['url_to_summarize'] = url; context.user_data['original_message_id'] = update.message.message_id
 
677
  keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
678
  reply_markup = InlineKeyboardMarkup(keyboard)
679
- await update.message.reply_text( f"Okay, I see this link:\n{url}\n\nHow would you like it summarised?", reply_markup=reply_markup, disable_web_page_preview=True ) # Changed 'summarized'
680
 
681
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
682
  query = update.callback_query
683
  if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
684
  user = query.from_user; summary_type = query.data; query_id = query.id
685
  try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id}")
686
- except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True)
687
- url = context.user_data.get('url_to_summarize'); message_id_to_edit = query.message.message_id
 
 
688
  logger.info(f"User {user.id} chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
 
689
  if not url:
690
- logger.warning(f"No URL in context for user {user.id} (cb {query_id}).")
691
  try: await query.edit_message_text(text="Sorry, I couldn't find the URL associated with this request. Please send the link again.")
692
  except Exception as e:
693
- logger.error(f"Failed edit 'URL not found' msg: {e}")
694
- try: await context.bot.send_message(chat_id=user.id, text="Sorry, context lost. Send link again.")
695
- except Exception: pass
 
696
  return
697
 
698
- context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
 
 
 
699
 
700
- global TELEGRAM_TOKEN, GEMINI_API_KEY, OPENROUTER_API_KEY, _gemini_primary_enabled, _openrouter_fallback_enabled
 
701
  if not TELEGRAM_TOKEN:
702
- logger.critical("TG TOKEN missing!")
703
- try: await query.edit_message_text(text="❌ Bot config error.")
704
- except Exception: pass
705
  return
706
  if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
707
- logger.critical("Neither Gemini nor OpenRouter API keys are configured/valid!")
708
- try: await query.edit_message_text(text="❌ AI config error: No models available.")
709
- except Exception: pass
710
  return
711
  elif not _gemini_primary_enabled:
712
- logger.warning("Primary AI (Gemini) is unavailable, will rely on fallback.")
713
- # No need to inform user unless fallback also fails later
714
  elif not _openrouter_fallback_enabled:
715
- logger.warning("Fallback AI (OpenRouter) is unavailable.")
716
- # No need to inform user unless primary fails later
717
 
 
718
  logger.info(f"Scheduling task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
719
- asyncio.create_task( process_summary_task( user_id=user.id, chat_id=query.message.chat_id, message_id_to_edit=message_id_to_edit, url=url, summary_type=summary_type, bot_token=TELEGRAM_TOKEN ), name=f"SummaryTask-{user.id}-{message_id_to_edit}" )
 
 
 
 
 
 
 
 
 
 
 
720
 
721
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
722
- ignore_errors = (AttributeError, )
723
- if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): logger.debug(f"Ignoring known/handled error in error_handler: {context.error}"); return
 
 
 
 
724
  logger.error("Exception while handling an update:", exc_info=context.error)
 
 
 
 
725
 
 
726
  async def setup_bot_config() -> Application:
727
  logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
728
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
 
729
  custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
730
- application = Application.builder().token(TELEGRAM_TOKEN).request(custom_request).build()
731
- application.add_handler(CommandHandler("start", start)); application.add_handler(CommandHandler("help", help_command))
732
- application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url)); application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
733
- application.add_error_handler(error_handler); logger.info("Telegram application handlers configured."); return application
734
-
 
 
 
 
 
 
 
 
735
  @contextlib.asynccontextmanager
736
  async def lifespan(app: Starlette):
737
  global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
@@ -739,75 +929,156 @@ async def lifespan(app: Starlette):
739
  if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
740
  try:
741
  ptb_app = await setup_bot_config(); await ptb_app.initialize(); bot_info = await ptb_app.bot.get_me(); logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
 
742
  current_webhook_info = await ptb_app.bot.get_webhook_info()
743
  if current_webhook_info and current_webhook_info.url:
744
  logger.info(f"Found existing webhook: {current_webhook_info.url}. Deleting...")
745
  try:
746
  if await ptb_app.bot.delete_webhook(drop_pending_updates=True): logger.info("Webhook deleted.")
747
  else: logger.warning("Failed delete webhook (API returned False).")
748
- except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1)
749
- space_host = os.environ.get("SPACE_HOST"); webhook_path = "/webhook"; full_webhook_url = None
 
 
 
750
  if space_host:
751
- protocol = "https"; host = space_host.split('://')[-1]; full_webhook_url = f"{protocol}://{host.rstrip('/')}{webhook_path}"
752
- if full_webhook_url:
753
- logger.info(f"Setting webhook: {full_webhook_url}"); set_webhook_args = { "url": full_webhook_url, "allowed_updates": Update.ALL_TYPES, "drop_pending_updates": True }
754
- if WEBHOOK_SECRET: set_webhook_args["secret_token"] = WEBHOOK_SECRET; logger.info("Using webhook secret.")
755
- await asyncio.sleep(1.0)
756
- try:
757
- await ptb_app.bot.set_webhook(**set_webhook_args); webhook_info = await ptb_app.bot.get_webhook_info()
758
- if webhook_info.url == full_webhook_url: logger.info(f"Webhook set: URL='{webhook_info.url}', Secret={bool(WEBHOOK_SECRET)}")
759
- else: logger.error(f"Webhook URL mismatch! Expected '{full_webhook_url}', Got '{webhook_info.url}'"); raise RuntimeError("Webhook URL mismatch.")
760
- await ptb_app.start(); logger.info("PTB Application started (webhook mode).")
761
- except Exception as e: logger.error(f"FATAL: Failed set webhook: {e}", exc_info=True); raise RuntimeError(f"Failed set webhook: {e}") from e
762
- else: logger.critical("Could not construct webhook URL."); raise RuntimeError("Webhook URL undetermined.")
763
- else: logger.critical("SPACE_HOST missing."); raise RuntimeError("SPACE_HOST env var missing.")
764
- logger.info("ASGI Lifespan: Startup complete."); yield
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
  except Exception as startup_err:
766
  logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
 
767
  if ptb_app:
768
  if ptb_app.running: await ptb_app.stop()
769
  await ptb_app.shutdown()
770
- raise
771
- finally:
 
772
  logger.info("ASGI Lifespan: Shutdown initiated...")
773
  if ptb_app:
774
- if ptb_app.running: logger.info("Stopping PTB..."); await ptb_app.stop()
775
- logger.info("Shutting down PTB..."); await ptb_app.shutdown(); logger.info("PTB Application shut down.")
776
- else: logger.info("PTB application not initialized or failed.")
 
 
 
 
 
777
  logger.info("ASGI Lifespan: Shutdown complete.")
778
 
779
  async def health_check(request: Request) -> PlainTextResponse:
780
- global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled
781
  bot_status = "Not Initialized"
782
  if ptb_app and ptb_app.bot:
783
  try:
784
- if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
 
 
785
  else: bot_status = "Initialized/Not running"
786
  except Exception as e: bot_status = f"Error checking status: {e}"
787
- return PlainTextResponse( f"TG Bot Summariser - Status: {bot_status}\n" # Changed 'Summarizer'
788
- f"Primary Model: {GEMINI_MODEL if _gemini_primary_enabled else 'N/A (Disabled)'}\n"
789
- f"Fallback Model: {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'N/A (Disabled)'}\n"
790
- f"Apify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}" )
 
 
 
 
 
 
 
 
 
 
791
 
792
  async def telegram_webhook(request: Request) -> Response:
793
  global WEBHOOK_SECRET
794
- if not ptb_app: logger.error("Webhook recv but PTB not initialized."); return PlainTextResponse('Bot not initialized', status_code=503)
795
- if not ptb_app.running: logger.warning("Webhook recv but PTB not running."); return PlainTextResponse('Bot not running', status_code=503)
 
 
 
 
 
796
  try:
 
797
  if WEBHOOK_SECRET:
798
  token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
799
- if token_header != WEBHOOK_SECRET: logger.warning(f"Webhook invalid secret. Header: '{token_header}'"); return Response(content="Invalid secret token", status_code=403)
800
- update_data = await request.json(); update = Update.de_json(data=update_data, bot=ptb_app.bot); logger.debug(f"Processing update_id: {update.update_id} via webhook")
801
- await ptb_app.process_update(update); return Response(status_code=200) # OK
802
- except json.JSONDecodeError: logger.error("Webhook invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
803
- except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
804
-
805
- app = Starlette( debug=False, lifespan=lifespan, routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
806
- logger.info("Starlette ASGI application created with native routes.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
807
 
 
808
  if __name__ == '__main__':
809
  import uvicorn
810
- logger.warning("Running in development mode using Uvicorn directly")
811
  log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
 
812
  local_port = int(os.environ.get('PORT', 8080))
 
 
 
 
813
  uvicorn.run("__main__:app", host='0.0.0.0', port=local_port, log_level=log_level, reload=True)
 
1
+ # main.py (Modified to add crawl4ai and adjust fetching logic)
2
  import os
3
  import re
4
  import logging
 
52
  _gemini_available = False
53
  # logger will be defined later, log warning after logger setup
54
 
55
+ # --- Crawl4AI (New Primary Web Scraper) ---
56
+ try:
57
+ from crawl4ai import AsyncWebCrawler
58
+ _crawl4ai_available = True
59
+ except ImportError:
60
+ AsyncWebCrawler = None
61
+ _crawl4ai_available = False
62
+ # logger will be defined later
63
+
64
 
65
  # --- Logging Setup ---
66
  logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO )
 
72
  logging.getLogger('uvicorn').setLevel(logging.INFO)
73
  logging.getLogger('starlette').setLevel(logging.INFO)
74
  if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
75
+ # Reduce crawl4ai logging noise if needed
76
+ if _crawl4ai_available:
77
+ logging.getLogger("crawl4ai").setLevel(logging.WARNING) # Or INFO for more detail
78
+ logging.getLogger("playwright").setLevel(logging.WARNING)
79
  logger = logging.getLogger(__name__)
80
  logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
81
  if not _gemini_available: logger.warning("google-generativeai library not found. Gemini functionality disabled.")
82
+ if not _crawl4ai_available: logger.warning("crawl4ai library not found. Primary website scraping will be disabled.")
83
 
84
 
85
  # --- Global variable for PTB app ---
 
95
 
96
  TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
97
  OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY') # Now Fallback
98
+ URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY') # Fallback Web 2
99
+ SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY') # Fallback YT 1
100
+ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN') # Fallback YT 2
101
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
102
+ GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Primary Summarizer
103
 
104
  # Models (User can still configure via env vars)
105
+ OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Summarizer Model
106
  APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
107
+ GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash-001") # Primary Summarizer Model
108
 
109
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
110
  if not GEMINI_API_KEY: logger.error("❌ ERROR: GEMINI_API_KEY not found. Primary summarization (Gemini) will fail.")
 
115
  elif not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found or empty. Gemini disabled.")
116
 
117
  _openrouter_fallback_enabled = bool(OPENROUTER_API_KEY)
118
+ if not _openrouter_fallback_enabled: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback summarization disabled.")
119
 
120
+ _crawl4ai_primary_web_enabled = _crawl4ai_available
121
+ if not _crawl4ai_primary_web_enabled: logger.warning("⚠️ WARNING: crawl4ai library not found. Primary web scraping disabled.")
122
+
123
+ _bs4_fallback_web_enabled = True # Assumes bs4 is always available
124
+ _urltotext_fallback_web_enabled = bool(URLTOTEXT_API_KEY)
125
+ if not _urltotext_fallback_web_enabled: logger.info("ℹ️ INFO: URLTOTEXT_API_KEY not found. Secondary web fallback disabled.")
126
+
127
+ # Fallback YT checks
128
+ if not SUPADATA_API_KEY: logger.info("ℹ️ INFO: SUPADATA_API_KEY not found. First YT fallback disabled.")
129
+ _apify_token_exists = bool(APIFY_API_TOKEN)
130
+ if not _apify_token_exists: logger.info("ℹ️ INFO: APIFY_API_TOKEN not found. Second YT fallback disabled.")
131
 
 
 
 
132
  if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
133
 
134
  logger.info("Secret loading and configuration check finished.")
135
+ logger.info(f"Primary Web Scraper: {'Crawl4AI' if _crawl4ai_primary_web_enabled else 'DISABLED'}")
136
+ logger.info(f"Fallback Web Scraper 1: {'BeautifulSoup' if _bs4_fallback_web_enabled else 'DISABLED'}")
137
+ logger.info(f"Fallback Web Scraper 2: {'urltotext.com API' if _urltotext_fallback_web_enabled else 'DISABLED'}")
138
+ logger.info(f"Using Gemini Model (Primary Summarizer): {GEMINI_MODEL if _gemini_primary_enabled else 'DISABLED'}")
139
+ logger.info(f"Using OpenRouter Model (Fallback Summarizer): {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}")
140
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
 
141
 
142
 
143
  if _gemini_primary_enabled:
 
167
 
168
 
169
  # --- Content Fetching Functions ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
+ # --- YouTube Fetching (Unchanged) ---
172
  async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
173
  if not video_id: logger.error("[Supadata] No video_id provided"); return None
174
  if not api_key: logger.error("[Supadata] API key missing."); return None
 
284
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
285
  return transcript_text
286
 
287
+ # --- Website Content Fetching ---
288
+
289
+ # NEW: Primary Method using Crawl4AI
290
+ async def get_website_content_via_crawl4ai(url: str) -> Optional[str]:
291
+ """Fetches website content using Crawl4AI. Returns Markdown."""
292
+ global _crawl4ai_primary_web_enabled
293
+ if not _crawl4ai_primary_web_enabled:
294
+ logger.warning("[Crawl4AI Primary] Called but disabled/unavailable.")
295
+ return None
296
+ if not url: logger.error("[Crawl4AI Primary] No URL provided"); return None
297
+
298
+ logger.info(f"[Crawl4AI Primary] Attempting fetch for: {url}")
299
+ try:
300
+ # Using async with for proper resource cleanup
301
+ async with AsyncWebCrawler(headless=True) as crawler: # Headless is generally preferred for server environments
302
+ # Timeout can be added here if needed: crawler_params={"timeout": 60000} # milliseconds
303
+ result = await crawler.arun(url=url)
304
+ if result and result.markdown:
305
+ logger.info(f"[Crawl4AI Primary] Success via Crawl4AI for {url}. Markdown length: {len(result.markdown)}")
306
+ return result.markdown.strip()
307
+ elif result and not result.markdown:
308
+ logger.warning(f"[Crawl4AI Primary] Crawl4AI ran but returned empty markdown for {url}.")
309
+ return None
310
+ else:
311
+ logger.warning(f"[Crawl4AI Primary] Crawl4AI returned no result object for {url}.")
312
+ return None
313
+ except ImportError:
314
+ logger.error("[Crawl4AI Primary] Import Error - library might be missing.")
315
+ _crawl4ai_primary_web_enabled = False # Disable if import fails at runtime
316
+ return None
317
+ except Exception as e:
318
+ # Catch potential Playwright errors, timeouts, or other issues
319
+ logger.error(f"[Crawl4AI Primary] Error during Crawl4AI execution for {url}: {e}", exc_info=True)
320
+ return None
321
+
322
+ # HELPER: Used by Fallback 1 (BS4) - no changes needed here
323
+ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
324
+ headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
325
+ try:
326
+ async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
327
+ logger.debug(f"[Web Scrape Helper] Sending request to {url}")
328
+ response = await client.get(url)
329
+ logger.debug(f"[Web Scrape Helper] Received response {response.status_code} from {url}")
330
+ response.raise_for_status()
331
+ content_type = response.headers.get('content-type', '').lower()
332
+ if 'html' not in content_type: logger.warning(f"[Web Scrape Helper] Non-HTML content type from {url}: {content_type}"); return None
333
+ try: return response.text
334
+ except Exception as e: logger.error(f"[Web Scrape Helper] Error decoding response for {url}: {e}"); return None
335
+ except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape Helper] HTTP error {e.response.status_code} fetching {url}: {e}")
336
+ except httpx.TimeoutException: logger.error(f"[Web Scrape Helper] Timeout error fetching {url}")
337
+ except httpx.TooManyRedirects: logger.error(f"[Web Scrape Helper] Too many redirects fetching {url}")
338
+ except httpx.RequestError as e: logger.error(f"[Web Scrape Helper] Request error fetching {url}: {e}")
339
+ except Exception as e: logger.error(f"[Web Scrape Helper] Unexpected error fetching {url}: {e}", exc_info=True)
340
+ return None
341
+
342
+ # Fallback 1: Direct BS4 Scraping (Renamed original function)
343
+ async def get_website_content_via_bs4(url: str) -> Optional[str]:
344
+ """Fetches and extracts text content using BeautifulSoup (Fallback 1)."""
345
+ global _bs4_fallback_web_enabled
346
+ if not _bs4_fallback_web_enabled:
347
+ logger.warning("[BS4 Fallback] Called but disabled.") # Should not happen unless manually disabled
348
+ return None
349
+ if not url: logger.error("[BS4 Fallback] No URL provided"); return None
350
+ logger.info(f"[BS4 Fallback] Fetching website content for: {url}")
351
+ html_content = await fetch_url_content_for_scrape(url) # Use the helper
352
+ if not html_content:
353
+ logger.warning(f"[BS4 Fallback] fetch_url_content_for_scrape failed for {url}")
354
+ return None
355
  try:
356
  def parse_html(content):
357
  soup = BeautifulSoup(content, DEFAULT_PARSER)
358
+ # Keep the existing cleaning logic
359
  for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]): element.extract()
360
  main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
361
  target_element = main_content if main_content else soup.body
362
+ if not target_element: logger.warning(f"[BS4 Fallback] Could not find body/main for parsing {url}"); return None
363
  lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
364
  text = " ".join(lines)
365
+ if not text: logger.warning(f"[BS4 Fallback] Extracted text empty after clean for {url}"); return None
366
  return text
367
+
368
  text_content = await asyncio.to_thread(parse_html, html_content)
369
+ if text_content: logger.info(f"[BS4 Fallback] Success scrape for {url} (final len: {len(text_content)})"); return text_content
370
+ else:
371
+ logger.warning(f"[BS4 Fallback] parse_html returned None for {url}")
372
+ return None
373
+ except Exception as e: logger.error(f"[BS4 Fallback] Error scraping/parsing {url}: {e}", exc_info=True); return None
374
 
375
+ # Fallback 2: urltotext.com API (Unchanged function)
376
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
377
+ """Fetches website content using urltotext.com API (Fallback 2)."""
378
+ global _urltotext_fallback_web_enabled
379
+ if not _urltotext_fallback_web_enabled:
380
+ logger.warning("[urltotext API Fallback] Called but disabled (no API key).")
381
+ return None
382
+ if not url: logger.error("[urltotext API Fallback] No URL"); return None
383
+ if not api_key: logger.error("[urltotext API Fallback] urltotext.com API key missing."); return None # Redundant check but safe
384
+ logger.info(f"[urltotext API Fallback] Attempting fetch for: {url} using urltotext.com API")
385
  api_endpoint = "https://urltotext.com/api/v1/urltotext/"
386
  payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
387
  headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" }
388
  try:
389
  async with httpx.AsyncClient(timeout=45.0) as client:
390
+ logger.debug(f"[urltotext API Fallback] Sending request to urltotext.com API for {url}")
391
  response = await client.post(api_endpoint, headers=headers, json=payload)
392
+ logger.debug(f"[urltotext API Fallback] Received status {response.status_code} from urltotext.com API for {url}")
393
  if response.status_code == 200:
394
  try:
395
  data = response.json()
396
  content = data.get("data", {}).get("content"); credits = data.get("credits_used", "N/A"); warning = data.get("data", {}).get("warning")
397
+ if warning: logger.warning(f"[urltotext API Fallback] urltotext.com API Warning for {url}: {warning}")
398
+ if content: logger.info(f"[urltotext API Fallback] Success via urltotext.com API for {url}. Len: {len(content)}. Credits: {credits}"); return content.strip()
399
+ else: logger.warning(f"[urltotext API Fallback] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
400
+ except json.JSONDecodeError: logger.error(f"[urltotext API Fallback] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
401
+ except Exception as e: logger.error(f"[urltotext API Fallback] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
402
+ elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[urltotext API Fallback] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
403
+ else: logger.error(f"[urltotext API Fallback] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
404
+ except httpx.TimeoutException: logger.error(f"[urltotext API Fallback] Timeout connecting to urltotext.com API for {url}"); return None
405
+ except httpx.RequestError as e: logger.error(f"[urltotext API Fallback] Request error connecting to urltotext.com API for {url}: {e}"); return None
406
+ except Exception as e: logger.error(f"[urltotext API Fallback] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
407
+
408
+ # --- Summarization Functions (Unchanged) ---
409
 
410
  async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
411
  """Internal function to call Gemini API. Returns (summary, error_message)."""
 
415
  return None, "Error: Primary AI service (Gemini) not configured/available."
416
  logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
417
 
418
+ # Define prompts (Keep existing prompts)
419
  if summary_type == "paragraph":
420
  prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
421
  "• Clear and simple language suitable for someone unfamiliar with the topic.\n"
 
491
 
492
  if summary:
493
  logger.info(f"[Gemini Primary] Success generating summary. Output len: {len(summary)}");
 
 
 
494
  return summary.strip(), None
495
  else:
496
  finish_reason = response.candidates[0].finish_reason if response.candidates else 'N/A'
 
509
  return None, "Error: Fallback AI service (OpenRouter) not configured/available."
510
  logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
511
 
512
+ # Define prompts (Keep existing prompts)
513
  if summary_type == "paragraph":
514
  prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
515
  "• Clear and simple language suitable for someone unfamiliar with the topic.\n"
 
569
  summary = message.get("content")
570
  if summary:
571
  logger.info(f"[OpenRouter Fallback] Success via OpenRouter. Output len: {len(summary)}")
 
 
572
  return summary.strip(), None
573
  else:
574
  logger.warning(f"[OpenRouter Fallback] OpenRouter success but content empty. Resp: {data}")
 
659
  return "Sorry, an unknown error occurred during summary generation after trying all available models."
660
 
661
 
662
+ # --- Main Task Processing (Modified Web Fetching Logic) ---
663
  async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
664
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
665
  background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
666
  try: background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 ); bot = Bot(token=bot_token, request=background_request)
667
  except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
668
+
669
+ content: Optional[str] = None
670
+ user_feedback_message: Optional[str] = None
671
+ success = False
672
+ status_message_id = message_id_to_edit
673
+ message_to_delete_later_id : Optional[int] = None
674
+
675
  try:
676
+ # Send initial "Processing..." message (or edit existing)
677
  processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
678
  if status_message_id:
679
  try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None ); logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
 
684
  if status_message: message_to_delete_later_id = status_message.message_id; logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
685
  else: raise RuntimeError("Failed to send status message after retries.")
686
  except Exception as e: logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}"); raise
687
+
688
  try:
689
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
690
+ is_youtube = is_youtube_url(url)
691
+ logger.debug(f"[Task {task_id}] URL type: {'YouTube' if is_youtube else 'Website'}")
692
+
693
+ # --- YouTube Processing (Unchanged) ---
694
  if is_youtube:
695
  video_id = extract_youtube_id(url)
696
+ if video_id:
697
+ content = await get_youtube_transcript(video_id, url) # Tries lib -> Supadata -> Apify
698
+ else:
699
+ user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
700
+
701
+ if not content and not user_feedback_message:
702
+ user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
703
+
704
+ # --- Website Processing (NEW Logic) ---
705
  else:
706
+ # Method 1: Crawl4AI (Primary)
707
+ if _crawl4ai_primary_web_enabled:
708
+ logger.info(f"[Task {task_id}] Trying primary web method: Crawl4AI")
709
+ await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
710
+ content = await get_website_content_via_crawl4ai(url)
711
+ if content:
712
+ logger.info(f"[Task {task_id}] Success via Crawl4AI for {url} (len: {len(content)})")
713
+ else:
714
+ logger.warning(f"[Task {task_id}] Crawl4AI failed or returned empty for {url}.")
715
+ else:
716
+ logger.warning(f"[Task {task_id}] Crawl4AI is disabled. Skipping.")
717
+
718
+ # Method 2: BeautifulSoup (Fallback 1)
719
+ if not content and _bs4_fallback_web_enabled:
720
+ logger.warning(f"[Task {task_id}] Trying fallback web method 1: BeautifulSoup")
721
+ await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
722
+ content = await get_website_content_via_bs4(url) # Use the renamed function
723
+ if content:
724
+ logger.info(f"[Task {task_id}] Success via BS4 scrape for {url} (len: {len(content)})")
725
+ else:
726
+ logger.warning(f"[Task {task_id}] BS4 scrape failed or returned empty for {url}.")
727
+
728
+ # Method 3: urltotext.com API (Fallback 2)
729
+ if not content and _urltotext_fallback_web_enabled:
730
+ logger.warning(f"[Task {task_id}] Trying fallback web method 2: urltotext.com API")
731
+ await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
732
+ content = await get_website_content_via_api(url, URLTOTEXT_API_KEY) # API key checked inside function
733
+ if content:
734
+ logger.info(f"[Task {task_id}] Success via urltotext.com API for {url} (len: {len(content)})")
735
+ else:
736
+ logger.warning(f"[Task {task_id}] urltotext.com API failed or returned empty for {url}.")
737
+
738
+ # Final check if any website method succeeded
739
  if not content:
740
+ methods_tried = []
741
+ if _crawl4ai_primary_web_enabled: methods_tried.append("Crawl4AI")
742
+ if _bs4_fallback_web_enabled: methods_tried.append("BS4")
743
+ if _urltotext_fallback_web_enabled: methods_tried.append("API")
744
+ tried_str = ", ".join(methods_tried) if methods_tried else "configured methods"
745
+ user_feedback_message = f"Sorry, I couldn't fetch content from that website using any available method ({tried_str}). It might be blocked, inaccessible, or empty."
746
+
747
+ # --- Summarization ---
748
  if content:
749
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
750
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
751
+ # NOTE: `content` might be Markdown (from Crawl4AI) or plain text (from others).
752
+ # The LLM prompts should handle this reasonably well.
753
+ final_summary = await generate_summary(content, summary_type) # Calls Gemini -> OpenRouter
754
+
755
+ if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
756
+ user_feedback_message = final_summary # Pass AI error message to user
757
+ logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
758
  else:
759
  max_length = 4096; summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
760
+ # Send summary without explicit Markdown parsing, assuming LLM followed instructions
761
+ # for plain headings and standard bullet points. Using parse_mode=None.
762
  await retry_bot_operation( bot.send_message, chat_id=chat_id, text=summary_parts[0], parse_mode=None, link_preview_options={'is_disabled': True} )
763
+ for part in summary_parts[1:]:
764
+ await asyncio.sleep(0.5)
765
+ await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=None, link_preview_options={'is_disabled': True} )
766
+ success = True
767
+ logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts).")
768
+ user_feedback_message = None # Clear any previous fetch error if summary succeeded
769
+
770
+ # --- Send Feedback if Fetching Failed ---
771
+ elif user_feedback_message:
772
+ logger.warning(f"[Task {task_id}] Sending failure feedback: {user_feedback_message}")
773
+ await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )
774
+
775
  except Exception as e:
776
+ logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True)
777
+ user_feedback_message = "Oops! Something went really wrong while processing your request. Please try again later."
778
  try: await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
779
  except Exception: logger.error(f"[Task {task_id}] Failed to send unexpected error feedback.")
780
+
781
  except Exception as outer_e:
782
  logger.critical(f"[Task {task_id}] Critical outer error: {outer_e}", exc_info=True)
783
  try:
784
+ if bot: await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ Critical internal error occurred processing your request." )
785
  except Exception: logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
786
+
787
  finally:
788
+ # Delete the "Processing..." or button message
789
  delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
790
  if delete_target_id and bot:
791
  try: await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id); logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
792
  except Exception as del_e: logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
793
+
794
+ # Close the background bot's HTTP client
795
  if background_request and hasattr(background_request, '_client') and background_request._client:
796
  try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
797
  except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
798
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
799
 
800
+
801
+ # --- Bot Handlers (Unchanged) ---
802
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
803
  user = update.effective_user; mention = user.mention_html()
804
  if not user or not update.message: return
805
  logger.info(f"User {user.id} used /start.")
806
+ await update.message.reply_html( f"👋 Hello {mention}! I can summarise YouTube links or website URLs.\n\nJust send me a link anytime!" )
807
 
808
  async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
809
  user = update.effective_user
810
  if not user or not update.message: return
811
  logger.info(f"User {user.id} used /help.")
812
+ help_text = ( "🔍 How to use this bot:\n\n" "1. Send me any YouTube video link or website URL.\n" "2. I'll ask you how you want it summarised (paragraph or points).\n"
813
  "3. Click the button for your choice.\n" "4. Wait for the summary!\n\n" "I'll try multiple methods to get content if the first one fails (especially for YouTube transcripts).\n\n" "Commands:\n" "`/start` - Display welcome message\n" "`/help` - Show this help message" )
814
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
815
 
 
817
  if not update.message or not update.message.text: return
818
  url = update.message.text.strip(); user = update.effective_user
819
  if not user: return
820
+ # Basic URL validation
821
+ if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]:
822
+ logger.debug(f"Ignoring non-URL message from {user.id}")
823
+ # Optional: Reply if it looks like they tried to send something else?
824
+ # await update.message.reply_text("Please send a valid website URL (starting with http:// or https://) or a YouTube link.")
825
+ return
826
  logger.info(f"User {user.id} sent potential URL: {url}")
827
+ context.user_data['url_to_summarize'] = url
828
+ context.user_data['original_message_id'] = update.message.message_id # Store original message ID if needed later
829
  keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
830
  reply_markup = InlineKeyboardMarkup(keyboard)
831
+ await update.message.reply_text( f"Okay, I see this link:\n{url}\n\nHow would you like it summarised?", reply_markup=reply_markup, disable_web_page_preview=True )
832
 
833
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
834
  query = update.callback_query
835
  if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
836
  user = query.from_user; summary_type = query.data; query_id = query.id
837
  try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id}")
838
+ except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True) # Log but continue
839
+
840
+ url = context.user_data.get('url_to_summarize')
841
+ message_id_to_edit = query.message.message_id # The message with the buttons
842
  logger.info(f"User {user.id} chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
843
+
844
  if not url:
845
+ logger.warning(f"No URL in context for user {user.id} (cb {query_id}). Might be an old button.")
846
  try: await query.edit_message_text(text="Sorry, I couldn't find the URL associated with this request. Please send the link again.")
847
  except Exception as e:
848
+ logger.error(f"Failed to edit 'URL not found' message: {e}")
849
+ # Attempt to send a new message as a fallback
850
+ try: await context.bot.send_message(chat_id=user.id, text="Sorry, the context for your previous request seems to have expired. Please send the link again.")
851
+ except Exception as send_e: logger.error(f"Failed even to send new message about lost context: {send_e}")
852
  return
853
 
854
+ # Clear context *after* checking it exists
855
+ context.user_data.pop('url_to_summarize', None)
856
+ context.user_data.pop('original_message_id', None) # Clear original ID too
857
+ logger.debug(f"Cleared URL context for user {user.id}")
858
 
859
+ # Check critical configurations before scheduling task
860
+ global TELEGRAM_TOKEN, _gemini_primary_enabled, _openrouter_fallback_enabled
861
  if not TELEGRAM_TOKEN:
862
+ logger.critical("TELEGRAM_TOKEN missing! Cannot schedule task.")
863
+ try: await query.edit_message_text(text="❌ Bot configuration error (Token). Task cannot be started.")
864
+ except Exception: pass # Ignore if edit fails
865
  return
866
  if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
867
+ logger.critical("Neither Gemini nor OpenRouter API keys are configured/valid! Cannot schedule task.")
868
+ try: await query.edit_message_text(text="❌ AI configuration error: No summarization models available. Task cannot be started.")
869
+ except Exception: pass # Ignore if edit fails
870
  return
871
  elif not _gemini_primary_enabled:
872
+ logger.warning("Primary AI (Gemini) is unavailable, will rely on fallback for summarization.")
873
+ # User will be notified by generate_summary if fallback also fails
874
  elif not _openrouter_fallback_enabled:
875
+ logger.warning("Fallback AI (OpenRouter) is unavailable for summarization.")
876
+ # User will be notified by generate_summary if primary fails
877
 
878
+ # Schedule the background task
879
  logger.info(f"Scheduling task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
880
+ asyncio.create_task(
881
+ process_summary_task(
882
+ user_id=user.id,
883
+ chat_id=query.message.chat_id,
884
+ message_id_to_edit=message_id_to_edit, # Pass the button message ID to edit/delete
885
+ url=url,
886
+ summary_type=summary_type,
887
+ bot_token=TELEGRAM_TOKEN
888
+ ),
889
+ name=f"SummaryTask-{user.id}-{message_id_to_edit}"
890
+ )
891
+ # Don't edit the message here; the task will handle it immediately.
892
 
893
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
894
+ # Ignore specific, known errors if necessary
895
+ ignore_errors = (AttributeError, ) # Example
896
+ if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error):
897
+ logger.debug(f"Ignoring known/handled error in error_handler: {context.error}")
898
+ return
899
+
900
  logger.error("Exception while handling an update:", exc_info=context.error)
901
+ # Optionally, try to inform the user if it's a direct message context
902
+ # if isinstance(update, Update) and update.effective_chat:
903
+ # try: await context.bot.send_message(chat_id=update.effective_chat.id, text="An internal error occurred.")
904
+ # except Exception: logger.error("Failed to send error message to user.")
905
 
906
+ # --- Application Setup (Unchanged) ---
907
  async def setup_bot_config() -> Application:
908
  logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
909
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
910
+ # Configure HTTPX client for PTB
911
  custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
912
+ application = ( Application.builder() .token(TELEGRAM_TOKEN) .request(custom_request) .build() )
913
+ # Add handlers
914
+ application.add_handler(CommandHandler("start", start))
915
+ application.add_handler(CommandHandler("help", help_command))
916
+ # Message handler for potential URLs (non-command text)
917
+ application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
918
+ # Callback handler for summary type buttons
919
+ application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
920
+ # Error handler
921
+ application.add_error_handler(error_handler)
922
+ logger.info("Telegram application handlers configured."); return application
923
+
924
+ # --- ASGI Lifespan & Webhook (Unchanged) ---
925
  @contextlib.asynccontextmanager
926
  async def lifespan(app: Starlette):
927
  global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
 
929
  if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
930
  try:
931
  ptb_app = await setup_bot_config(); await ptb_app.initialize(); bot_info = await ptb_app.bot.get_me(); logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
932
+ # Webhook setup logic
933
  current_webhook_info = await ptb_app.bot.get_webhook_info()
934
  if current_webhook_info and current_webhook_info.url:
935
  logger.info(f"Found existing webhook: {current_webhook_info.url}. Deleting...")
936
  try:
937
  if await ptb_app.bot.delete_webhook(drop_pending_updates=True): logger.info("Webhook deleted.")
938
  else: logger.warning("Failed delete webhook (API returned False).")
939
+ except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1) # Brief pause before setting new one
940
+
941
+ space_host = os.environ.get("SPACE_HOST") # Provided by Hugging Face Spaces
942
+ webhook_path = "/webhook" # Matches the Starlette route below
943
+ full_webhook_url = None
944
  if space_host:
945
+ protocol = "https"
946
+ host = space_host.split('://')[-1] # Remove potential protocol prefix from env var
947
+ full_webhook_url = f"{protocol}://{host.rstrip('/')}{webhook_path}"
948
+
949
+ if full_webhook_url:
950
+ logger.info(f"Setting webhook: {full_webhook_url}")
951
+ set_webhook_args: Dict[str, Any] = {
952
+ "url": full_webhook_url,
953
+ "allowed_updates": Update.ALL_TYPES,
954
+ "drop_pending_updates": True
955
+ }
956
+ if WEBHOOK_SECRET:
957
+ set_webhook_args["secret_token"] = WEBHOOK_SECRET
958
+ logger.info("Using webhook secret token.")
959
+
960
+ await asyncio.sleep(1.0) # Short delay before setting webhook
961
+
962
+ try:
963
+ await ptb_app.bot.set_webhook(**set_webhook_args)
964
+ webhook_info = await ptb_app.bot.get_webhook_info() # Verify
965
+ if webhook_info.url == full_webhook_url:
966
+ logger.info(f"Webhook set successfully: URL='{webhook_info.url}', Secret Configured={bool(WEBHOOK_SECRET)}")
967
+ else:
968
+ logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', Got '{webhook_info.url}'. Check SPACE_HOST env var.")
969
+ raise RuntimeError("Webhook URL mismatch after setting.")
970
+
971
+ await ptb_app.start() # Start listening for updates via webhook
972
+ logger.info("PTB Application started (webhook mode).")
973
+
974
+ except Exception as e:
975
+ logger.critical(f"FATAL: Failed to set webhook: {e}", exc_info=True)
976
+ raise RuntimeError(f"Failed to set webhook: {e}") from e
977
+ else:
978
+ logger.critical("Could not construct webhook URL. SPACE_HOST environment variable might be missing or invalid.")
979
+ raise RuntimeError("Webhook URL could not be determined.")
980
+
981
+ logger.info("ASGI Lifespan: Startup complete."); yield # Application runs here
982
+
983
  except Exception as startup_err:
984
  logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
985
+ # Ensure cleanup even if startup fails midway
986
  if ptb_app:
987
  if ptb_app.running: await ptb_app.stop()
988
  await ptb_app.shutdown()
989
+ raise # Reraise the exception to stop the ASGI server
990
+
991
+ finally: # Shutdown phase
992
  logger.info("ASGI Lifespan: Shutdown initiated...")
993
  if ptb_app:
994
+ if ptb_app.running:
995
+ logger.info("Stopping PTB application...")
996
+ await ptb_app.stop()
997
+ logger.info("Shutting down PTB application...")
998
+ await ptb_app.shutdown()
999
+ logger.info("PTB Application shut down.")
1000
+ else:
1001
+ logger.info("PTB application was not initialized or failed during startup.")
1002
  logger.info("ASGI Lifespan: Shutdown complete.")
1003
 
1004
  async def health_check(request: Request) -> PlainTextResponse:
1005
+ global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled, _crawl4ai_primary_web_enabled, _bs4_fallback_web_enabled, _urltotext_fallback_web_enabled
1006
  bot_status = "Not Initialized"
1007
  if ptb_app and ptb_app.bot:
1008
  try:
1009
+ if ptb_app.running:
1010
+ bot_info = await ptb_app.bot.get_me()
1011
+ bot_status = f"Running (@{bot_info.username})"
1012
  else: bot_status = "Initialized/Not running"
1013
  except Exception as e: bot_status = f"Error checking status: {e}"
1014
+
1015
+ web_status = f"Web Primary: {'Crawl4AI' if _crawl4ai_primary_web_enabled else 'DISABLED'}\n" \
1016
+ f"Web Fallback 1: {'BS4' if _bs4_fallback_web_enabled else 'DISABLED'}\n" \
1017
+ f"Web Fallback 2: {'API' if _urltotext_fallback_web_enabled else 'DISABLED'}"
1018
+ summary_status = f"Summarizer Primary: {GEMINI_MODEL if _gemini_primary_enabled else 'DISABLED'}\n" \
1019
+ f"Summarizer Fallback: {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}"
1020
+ yt_status = f"YT Fallback 1: {'Supadata' if SUPADATA_API_KEY else 'DISABLED'}\n" \
1021
+ f"YT Fallback 2: {APIFY_ACTOR_ID if _apify_token_exists else 'DISABLED'}"
1022
+
1023
+
1024
+ return PlainTextResponse( f"TG Bot Summariser - Status: {bot_status}\n\n"
1025
+ f"{web_status}\n\n"
1026
+ f"{summary_status}\n\n"
1027
+ f"{yt_status}" )
1028
 
1029
  async def telegram_webhook(request: Request) -> Response:
1030
  global WEBHOOK_SECRET
1031
+ if not ptb_app:
1032
+ logger.error("Webhook received but PTB application is not initialized.")
1033
+ return PlainTextResponse('Bot not initialized', status_code=503) # Service Unavailable
1034
+ if not ptb_app.running:
1035
+ logger.warning("Webhook received but PTB application is not running.")
1036
+ return PlainTextResponse('Bot not running', status_code=503) # Service Unavailable
1037
+
1038
  try:
1039
+ # Validate secret token if configured
1040
  if WEBHOOK_SECRET:
1041
  token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
1042
+ if token_header != WEBHOOK_SECRET:
1043
+ logger.warning(f"Webhook received with invalid secret token. Header: '{token_header}'")
1044
+ return Response(content="Invalid secret token", status_code=403) # Forbidden
1045
+
1046
+ # Process the update
1047
+ update_data = await request.json()
1048
+ update = Update.de_json(data=update_data, bot=ptb_app.bot)
1049
+ logger.debug(f"Processing update_id: {update.update_id} via webhook")
1050
+ await ptb_app.process_update(update)
1051
+ return Response(status_code=200) # OK - Tell Telegram we received it
1052
+
1053
+ except json.JSONDecodeError:
1054
+ logger.error("Webhook received invalid JSON payload.")
1055
+ return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
1056
+ except Exception as e:
1057
+ logger.error(f"Error processing webhook update: {e}", exc_info=True)
1058
+ # Still return OK to Telegram to prevent retries for potentially poison-pill updates,
1059
+ # but log the error for debugging.
1060
+ return Response(status_code=200)
1061
+
1062
+ # --- ASGI App Definition (Unchanged) ---
1063
+ app = Starlette(
1064
+ debug=False, # Keep debug False in production
1065
+ lifespan=lifespan,
1066
+ routes=[
1067
+ Route("/", endpoint=health_check, methods=["GET"]),
1068
+ Route("/webhook", endpoint=telegram_webhook, methods=["POST"]),
1069
+ ]
1070
+ )
1071
+ logger.info("Starlette ASGI application created with health check and webhook routes.")
1072
 
1073
+ # --- Direct Run (for local testing, unchanged) ---
1074
  if __name__ == '__main__':
1075
  import uvicorn
1076
+ logger.warning("Running in development mode using Uvicorn directly (not for production)")
1077
  log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
1078
+ # Use PORT from env var (like HF Spaces provides) or default to 8080 for local dev
1079
  local_port = int(os.environ.get('PORT', 8080))
1080
+ # Disable webhook setup for local Uvicorn run if needed (manual polling instead)
1081
+ # You might need to comment out the webhook setting logic in lifespan for local testing
1082
+ # or run with ngrok/similar and set SPACE_HOST manually.
1083
+ # For simplicity, assuming webhook setup will just log errors if SPACE_HOST isn't set locally.
1084
  uvicorn.run("__main__:app", host='0.0.0.0', port=local_port, log_level=log_level, reload=True)