fmab777 commited on
Commit
f4b250d
·
verified ·
1 Parent(s): 85d65ef

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +453 -133
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Corrected SyntaxError at line 424 - Now with Gemini 2.0 as primary)
2
  import os
3
  import re
4
  import logging
@@ -7,6 +7,7 @@ import json
7
  import html
8
  import contextlib
9
  import traceback
 
10
  from typing import Optional, Dict, Any, Tuple
11
 
12
  # --- Frameworks ---
@@ -81,11 +82,12 @@ def get_secret(secret_name):
81
 
82
  TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
83
  OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY') # Now Fallback
84
- URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
85
- SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
86
- APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
 
87
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
88
- GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Now Primary
89
 
90
  # Models (User can still configure via env vars)
91
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Model
@@ -95,6 +97,7 @@ GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash-001") # Primary
95
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
96
  if not GEMINI_API_KEY: logger.error("❌ ERROR: GEMINI_API_KEY not found. Primary summarization (Gemini) will fail.")
97
  if not OPENROUTER_API_KEY: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback summarization will fail.")
 
98
 
99
  _gemini_primary_enabled = _gemini_available and bool(GEMINI_API_KEY)
100
  if not _gemini_available: logger.warning("⚠️ WARNING: google-generativeai library missing. Gemini disabled.")
@@ -104,9 +107,9 @@ _openrouter_fallback_enabled = bool(OPENROUTER_API_KEY)
104
  if not _openrouter_fallback_enabled: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback disabled.")
105
 
106
 
107
- if not URLTOTEXT_API_KEY: pass
108
- if not SUPADATA_API_KEY: pass
109
- if not APIFY_API_TOKEN: pass
110
  if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
111
 
112
  logger.info("Secret loading and configuration check finished.")
@@ -114,6 +117,8 @@ logger.info(f"Using Gemini Model (Primary): {GEMINI_MODEL if _gemini_primary_ena
114
  logger.info(f"Using OpenRouter Model (Fallback): {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}")
115
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
116
  _apify_token_exists = bool(APIFY_API_TOKEN)
 
 
117
 
118
 
119
  if _gemini_primary_enabled:
@@ -143,26 +148,8 @@ def extract_youtube_id(url):
143
 
144
 
145
  # --- Content Fetching Functions ---
146
- # (fetch_url_content_for_scrape, get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript, get_website_content, get_website_content_via_api remain the same as previous version)
147
- async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
148
- headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
149
- try:
150
- async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
151
- logger.debug(f"[Web Scrape] Sending request to {url}")
152
- response = await client.get(url)
153
- logger.debug(f"[Web Scrape] Received response {response.status_code} from {url}")
154
- response.raise_for_status()
155
- content_type = response.headers.get('content-type', '').lower()
156
- if 'html' not in content_type: logger.warning(f"[Web Scrape] Non-HTML content type from {url}: {content_type}"); return None
157
- try: return response.text
158
- except Exception as e: logger.error(f"[Web Scrape] Error decoding response for {url}: {e}"); return None
159
- except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape] HTTP error {e.response.status_code} fetching {url}: {e}")
160
- except httpx.TimeoutException: logger.error(f"[Web Scrape] Timeout error fetching {url}")
161
- except httpx.TooManyRedirects: logger.error(f"[Web Scrape] Too many redirects fetching {url}")
162
- except httpx.RequestError as e: logger.error(f"[Web Scrape] Request error fetching {url}: {e}")
163
- except Exception as e: logger.error(f"[Web Scrape] Unexpected error fetching {url}: {e}", exc_info=True)
164
- return None
165
 
 
166
  async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
167
  if not video_id: logger.error("[Supadata] No video_id provided"); return None
168
  if not api_key: logger.error("[Supadata] API key missing."); return None
@@ -250,6 +237,7 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
250
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
251
  logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
252
  transcript_text = None
 
253
  logger.info("[Primary YT] Attempting youtube-transcript-api...")
254
  try:
255
  transcript_list = await asyncio.to_thread( YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'] )
@@ -261,6 +249,7 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
261
  if "No transcript found" in str(e): logger.warning(f"[Primary YT] No transcript found.")
262
  elif "disabled" in str(e): logger.warning(f"[Primary YT] Transcripts disabled.")
263
  transcript_text = None
 
264
  if transcript_text is None:
265
  logger.info("[Fallback YT 1] Trying Supadata API...")
266
  if SUPADATA_API_KEY:
@@ -268,6 +257,7 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
268
  if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text
269
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
270
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
 
271
  if transcript_text is None:
272
  logger.info("[Fallback YT 2] Trying Apify REST API (SyncItems)...")
273
  if APIFY_API_TOKEN:
@@ -275,58 +265,196 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
275
  if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify SyncItems REST for {video_url}"); return transcript_text
276
  else: logger.warning(f"[Fallback YT 2] Apify SyncItems REST failed or no content for {video_url}.")
277
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
 
278
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
279
  return transcript_text
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  async def get_website_content(url: str) -> Optional[str]:
 
282
  if not url: logger.error("get_website_content: No URL"); return None
283
- logger.info(f"[Primary Web] Fetching website content for: {url}")
284
  html_content = await fetch_url_content_for_scrape(url)
285
  if not html_content: return None
286
  try:
287
  def parse_html(content):
288
  soup = BeautifulSoup(content, DEFAULT_PARSER)
289
- for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]): element.extract()
 
 
 
290
  main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
291
  target_element = main_content if main_content else soup.body
292
- if not target_element: logger.warning(f"[Primary Web] Could not find body/main for parsing {url}"); return None
 
293
  lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
294
  text = " ".join(lines)
295
- if not text: logger.warning(f"[Primary Web] Extracted text empty after clean for {url}"); return None
296
  return text
 
297
  text_content = await asyncio.to_thread(parse_html, html_content)
298
- if text_content: logger.info(f"[Primary Web] Success scrape for {url} (final len: {len(text_content)})"); return text_content
299
  else: return None
300
- except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
301
 
302
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
303
- if not url: logger.error("[Fallback Web API] No URL"); return None
304
- if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
305
- logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
 
306
  api_endpoint = "https://urltotext.com/api/v1/urltotext/"
307
  payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
308
  headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" }
309
  try:
310
  async with httpx.AsyncClient(timeout=45.0) as client:
311
- logger.debug(f"[Fallback Web API] Sending request to urltotext.com API for {url}")
312
  response = await client.post(api_endpoint, headers=headers, json=payload)
313
- logger.debug(f"[Fallback Web API] Received status {response.status_code} from urltotext.com API for {url}")
314
  if response.status_code == 200:
315
  try:
316
  data = response.json()
317
  content = data.get("data", {}).get("content"); credits = data.get("credits_used", "N/A"); warning = data.get("data", {}).get("warning")
318
- if warning: logger.warning(f"[Fallback Web API] urltotext.com API Warning for {url}: {warning}")
319
- if content: logger.info(f"[Fallback Web API] Success via urltotext.com API for {url}. Len: {len(content)}. Credits: {credits}"); return content.strip()
320
- else: logger.warning(f"[Fallback Web API] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
321
- except json.JSONDecodeError: logger.error(f"[Fallback Web API] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
322
- except Exception as e: logger.error(f"[Fallback Web API] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
323
- elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[Fallback Web API] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
324
- else: logger.error(f"[Fallback Web API] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
325
- except httpx.TimeoutException: logger.error(f"[Fallback Web API] Timeout connecting to urltotext.com API for {url}"); return None
326
- except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
327
- except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
328
-
329
- # --- Summarization Functions ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
  async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
332
  """Internal function to call Gemini API. Returns (summary, error_message)."""
@@ -412,9 +540,6 @@ async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Opt
412
 
413
  if summary:
414
  logger.info(f"[Gemini Primary] Success generating summary. Output len: {len(summary)}");
415
- # Escape Markdown for Telegram only if necessary (Removed escaping as it might conflict with plain heading)
416
- # escaped_summary = summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
417
- # Let's see if the AI respects the instruction without further escaping. If not, we might need selective escaping.
418
  return summary.strip(), None
419
  else:
420
  finish_reason = response.candidates[0].finish_reason if response.candidates else 'N/A'
@@ -493,8 +618,6 @@ async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str],
493
  summary = message.get("content")
494
  if summary:
495
  logger.info(f"[OpenRouter Fallback] Success via OpenRouter. Output len: {len(summary)}")
496
- # Escape Markdown for Telegram only if necessary (Removed escaping)
497
- # escaped_summary = summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
498
  return summary.strip(), None
499
  else:
500
  logger.warning(f"[OpenRouter Fallback] OpenRouter success but content empty. Resp: {data}")
@@ -585,14 +708,23 @@ async def generate_summary(text: str, summary_type: str) -> str:
585
  return "Sorry, an unknown error occurred during summary generation after trying all available models."
586
 
587
 
588
- # (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
 
589
  async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
 
590
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
591
  background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
592
  try: background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 ); bot = Bot(token=bot_token, request=background_request)
593
  except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
594
- content = None; user_feedback_message = None; success = False; status_message_id = message_id_to_edit; message_to_delete_later_id : Optional[int] = None
 
 
 
 
 
 
595
  try:
 
596
  processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
597
  if status_message_id:
598
  try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None ); logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
@@ -602,62 +734,118 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
602
  status_message = await retry_bot_operation( bot.send_message, chat_id=chat_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN )
603
  if status_message: message_to_delete_later_id = status_message.message_id; logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
604
  else: raise RuntimeError("Failed to send status message after retries.")
605
- except Exception as e: logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}"); raise
 
606
  try:
 
607
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
608
  is_youtube = is_youtube_url(url); logger.debug(f"[Task {task_id}] URL type: {'YouTube' if is_youtube else 'Website'}")
 
609
  if is_youtube:
 
610
  video_id = extract_youtube_id(url)
611
- if video_id: content = await get_youtube_transcript(video_id, url)
612
  else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
613
  if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
614
  else:
 
 
 
 
615
  content = await get_website_content(url)
 
 
616
  if not content:
617
- logger.warning(f"[Task {task_id}] Primary web scrape failed for {url}. Trying fallback API.")
618
- global URLTOTEXT_API_KEY
619
- if URLTOTEXT_API_KEY:
620
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
621
  content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
622
- if not content: user_feedback_message = "Sorry, I couldn't fetch content from that website using either method (blocked/inaccessible/empty?)."
623
- else: user_feedback_message = "Sorry, I couldn't fetch content from that website (blocked/inaccessible/empty?). The fallback method is not configured."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
624
  if content:
625
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
626
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
627
- final_summary = await generate_summary(content, summary_type) # This now calls Gemini first, then OpenRouter
628
- if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
 
 
 
629
  else:
630
- max_length = 4096; summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
631
- # Sending potentially unescaped summary directly - assuming AI follows instructions
632
- # ParseMode.MARKDOWN might interpret unintended things, test needed. Consider ParseMode=None if issues arise.
 
633
  await retry_bot_operation( bot.send_message, chat_id=chat_id, text=summary_parts[0], parse_mode=None, link_preview_options={'is_disabled': True} )
634
- for part in summary_parts[1:]: await asyncio.sleep(0.5); await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=None, link_preview_options={'is_disabled': True} )
635
- success = True; logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts)."); user_feedback_message = None
636
- elif user_feedback_message: logger.warning(f"[Task {task_id}] Sending failure feedback: {user_feedback_message}"); await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )
 
 
 
 
 
 
 
 
 
637
  except Exception as e:
638
- logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True); user_feedback_message = "Oops! Something went really wrong. Please try again later."
 
 
639
  try: await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
640
  except Exception: logger.error(f"[Task {task_id}] Failed to send unexpected error feedback.")
 
641
  except Exception as outer_e:
 
642
  logger.critical(f"[Task {task_id}] Critical outer error: {outer_e}", exc_info=True)
643
  try:
644
- if bot: await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ Critical internal error occurred." )
645
  except Exception: logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
646
  finally:
 
 
647
  delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
648
  if delete_target_id and bot:
649
  try: await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id); logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
650
  except Exception as del_e: logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
 
651
  if background_request and hasattr(background_request, '_client') and background_request._client:
652
  try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
653
  except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
654
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
655
 
 
 
 
656
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
657
  user = update.effective_user; mention = user.mention_html()
658
  if not user or not update.message: return
659
  logger.info(f"User {user.id} used /start.")
660
- await update.message.reply_html( f"👋 Hello {mention}! I can summarise YouTube links or website URLs.\n\nJust send me a link anytime!" ) # Changed 'summarize'
661
 
662
  async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
663
  user = update.effective_user
@@ -671,41 +859,53 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
671
  if not update.message or not update.message.text: return
672
  url = update.message.text.strip(); user = update.effective_user
673
  if not user: return
674
- if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]: logger.debug(f"Ignoring non-URL from {user.id}: {url}"); return
 
 
 
 
 
 
675
  logger.info(f"User {user.id} sent potential URL: {url}")
676
  context.user_data['url_to_summarize'] = url; context.user_data['original_message_id'] = update.message.message_id
677
  keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
678
  reply_markup = InlineKeyboardMarkup(keyboard)
679
- await update.message.reply_text( f"Okay, I see this link:\n{url}\n\nHow would you like it summarised?", reply_markup=reply_markup, disable_web_page_preview=True ) # Changed 'summarized'
680
 
681
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
682
  query = update.callback_query
683
  if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
684
  user = query.from_user; summary_type = query.data; query_id = query.id
685
  try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id}")
686
- except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True)
 
687
  url = context.user_data.get('url_to_summarize'); message_id_to_edit = query.message.message_id
688
  logger.info(f"User {user.id} chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
 
689
  if not url:
690
- logger.warning(f"No URL in context for user {user.id} (cb {query_id}).")
691
- try: await query.edit_message_text(text="Sorry, I couldn't find the URL associated with this request. Please send the link again.")
 
 
 
 
692
  except Exception as e:
693
  logger.error(f"Failed edit 'URL not found' msg: {e}")
694
- try: await context.bot.send_message(chat_id=user.id, text="Sorry, context lost. Send link again.")
695
- except Exception: pass
696
  return
697
 
 
698
  context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
699
 
700
- global TELEGRAM_TOKEN, GEMINI_API_KEY, OPENROUTER_API_KEY, _gemini_primary_enabled, _openrouter_fallback_enabled
701
  if not TELEGRAM_TOKEN:
702
- logger.critical("TG TOKEN missing!")
703
- try: await query.edit_message_text(text="❌ Bot config error.")
704
  except Exception: pass
705
  return
706
  if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
707
- logger.critical("Neither Gemini nor OpenRouter API keys are configured/valid!")
708
- try: await query.edit_message_text(text="❌ AI config error: No models available.")
709
  except Exception: pass
710
  return
711
  elif not _gemini_primary_enabled:
@@ -715,22 +915,62 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
715
  logger.warning("Fallback AI (OpenRouter) is unavailable.")
716
  # No need to inform user unless primary fails later
717
 
718
- logger.info(f"Scheduling task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
719
- asyncio.create_task( process_summary_task( user_id=user.id, chat_id=query.message.chat_id, message_id_to_edit=message_id_to_edit, url=url, summary_type=summary_type, bot_token=TELEGRAM_TOKEN ), name=f"SummaryTask-{user.id}-{message_id_to_edit}" )
 
 
 
 
 
 
 
 
 
 
 
720
 
721
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
722
- ignore_errors = (AttributeError, )
723
- if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): logger.debug(f"Ignoring known/handled error in error_handler: {context.error}"); return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
724
  logger.error("Exception while handling an update:", exc_info=context.error)
 
 
 
 
 
 
 
725
 
726
  async def setup_bot_config() -> Application:
727
  logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
728
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
729
  custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
730
  application = Application.builder().token(TELEGRAM_TOKEN).request(custom_request).build()
731
- application.add_handler(CommandHandler("start", start)); application.add_handler(CommandHandler("help", help_command))
732
- application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url)); application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
733
- application.add_error_handler(error_handler); logger.info("Telegram application handlers configured."); return application
 
 
 
 
 
 
 
734
 
735
  @contextlib.asynccontextmanager
736
  async def lifespan(app: Starlette):
@@ -745,69 +985,149 @@ async def lifespan(app: Starlette):
745
  try:
746
  if await ptb_app.bot.delete_webhook(drop_pending_updates=True): logger.info("Webhook deleted.")
747
  else: logger.warning("Failed delete webhook (API returned False).")
748
- except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1)
749
- space_host = os.environ.get("SPACE_HOST"); webhook_path = "/webhook"; full_webhook_url = None
 
 
 
 
750
  if space_host:
751
- protocol = "https"; host = space_host.split('://')[-1]; full_webhook_url = f"{protocol}://{host.rstrip('/')}{webhook_path}"
 
 
 
752
  if full_webhook_url:
753
- logger.info(f"Setting webhook: {full_webhook_url}"); set_webhook_args = { "url": full_webhook_url, "allowed_updates": Update.ALL_TYPES, "drop_pending_updates": True }
754
- if WEBHOOK_SECRET: set_webhook_args["secret_token"] = WEBHOOK_SECRET; logger.info("Using webhook secret.")
755
- await asyncio.sleep(1.0)
 
 
 
 
 
 
 
 
756
  try:
757
- await ptb_app.bot.set_webhook(**set_webhook_args); webhook_info = await ptb_app.bot.get_webhook_info()
758
- if webhook_info.url == full_webhook_url: logger.info(f"Webhook set: URL='{webhook_info.url}', Secret={bool(WEBHOOK_SECRET)}")
759
- else: logger.error(f"Webhook URL mismatch! Expected '{full_webhook_url}', Got '{webhook_info.url}'"); raise RuntimeError("Webhook URL mismatch.")
760
- await ptb_app.start(); logger.info("PTB Application started (webhook mode).")
761
- except Exception as e: logger.error(f"FATAL: Failed set webhook: {e}", exc_info=True); raise RuntimeError(f"Failed set webhook: {e}") from e
762
- else: logger.critical("Could not construct webhook URL."); raise RuntimeError("Webhook URL undetermined.")
763
- else: logger.critical("SPACE_HOST missing."); raise RuntimeError("SPACE_HOST env var missing.")
764
- logger.info("ASGI Lifespan: Startup complete."); yield
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
  except Exception as startup_err:
766
  logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
767
  if ptb_app:
768
  if ptb_app.running: await ptb_app.stop()
769
  await ptb_app.shutdown()
770
- raise
771
  finally:
 
772
  logger.info("ASGI Lifespan: Shutdown initiated...")
773
  if ptb_app:
774
- if ptb_app.running: logger.info("Stopping PTB..."); await ptb_app.stop()
775
- logger.info("Shutting down PTB..."); await ptb_app.shutdown(); logger.info("PTB Application shut down.")
776
- else: logger.info("PTB application not initialized or failed.")
 
 
 
 
 
 
 
777
  logger.info("ASGI Lifespan: Shutdown complete.")
778
 
779
  async def health_check(request: Request) -> PlainTextResponse:
 
780
  global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled
781
  bot_status = "Not Initialized"
782
  if ptb_app and ptb_app.bot:
783
  try:
784
- if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
785
- else: bot_status = "Initialized/Not running"
786
- except Exception as e: bot_status = f"Error checking status: {e}"
787
- return PlainTextResponse( f"TG Bot Summariser - Status: {bot_status}\n" # Changed 'Summarizer'
788
- f"Primary Model: {GEMINI_MODEL if _gemini_primary_enabled else 'N/A (Disabled)'}\n"
789
- f"Fallback Model: {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'N/A (Disabled)'}\n"
790
- f"Apify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}" )
 
 
 
 
 
 
 
 
 
791
 
792
  async def telegram_webhook(request: Request) -> Response:
 
793
  global WEBHOOK_SECRET
794
- if not ptb_app: logger.error("Webhook recv but PTB not initialized."); return PlainTextResponse('Bot not initialized', status_code=503)
795
- if not ptb_app.running: logger.warning("Webhook recv but PTB not running."); return PlainTextResponse('Bot not running', status_code=503)
 
 
 
 
 
 
 
 
 
796
  try:
797
- if WEBHOOK_SECRET:
798
- token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
799
- if token_header != WEBHOOK_SECRET: logger.warning(f"Webhook invalid secret. Header: '{token_header}'"); return Response(content="Invalid secret token", status_code=403)
800
- update_data = await request.json(); update = Update.de_json(data=update_data, bot=ptb_app.bot); logger.debug(f"Processing update_id: {update.update_id} via webhook")
801
- await ptb_app.process_update(update); return Response(status_code=200) # OK
802
- except json.JSONDecodeError: logger.error("Webhook invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
803
- except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
804
-
805
- app = Starlette( debug=False, lifespan=lifespan, routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
806
  logger.info("Starlette ASGI application created with native routes.")
807
 
 
808
  if __name__ == '__main__':
809
  import uvicorn
810
- logger.warning("Running in development mode using Uvicorn directly")
 
811
  log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
812
- local_port = int(os.environ.get('PORT', 8080))
813
- uvicorn.run("__main__:app", host='0.0.0.0', port=local_port, log_level=log_level, reload=True)
 
 
 
 
 
 
 
1
+ # main.py (Corrected SyntaxError at line 424 - Now with Gemini 2.0 as primary AND new scraping fallbacks)
2
  import os
3
  import re
4
  import logging
 
7
  import html
8
  import contextlib
9
  import traceback
10
+ import urllib.parse # Added for URL encoding
11
  from typing import Optional, Dict, Any, Tuple
12
 
13
  # --- Frameworks ---
 
82
 
83
  TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
84
  OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY') # Now Fallback
85
+ URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY') # Scrape Fallback 1
86
+ SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY') # YT Fallback 1
87
+ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN') # YT Fallback 2
88
+ RAPIDAPI_KEY = get_secret('RAPIDAPI_KEY') # Scrape Fallbacks 2 & 3 (NEW)
89
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
90
+ GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Now Primary Summarizer
91
 
92
  # Models (User can still configure via env vars)
93
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Model
 
97
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
98
  if not GEMINI_API_KEY: logger.error("❌ ERROR: GEMINI_API_KEY not found. Primary summarization (Gemini) will fail.")
99
  if not OPENROUTER_API_KEY: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback summarization will fail.")
100
+ if not RAPIDAPI_KEY: logger.warning("⚠️ WARNING: RAPIDAPI_KEY not found. RapidAPI scraping fallbacks will be unavailable.") # New check
101
 
102
  _gemini_primary_enabled = _gemini_available and bool(GEMINI_API_KEY)
103
  if not _gemini_available: logger.warning("⚠️ WARNING: google-generativeai library missing. Gemini disabled.")
 
107
  if not _openrouter_fallback_enabled: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback disabled.")
108
 
109
 
110
+ if not URLTOTEXT_API_KEY: logger.warning("Optional secret 'URLTOTEXT_API_KEY' not found. First web scraping fallback unavailable.") # Adjusted warning
111
+ if not SUPADATA_API_KEY: logger.warning("Optional secret 'SUPADATA_API_KEY' not found. First YT transcript fallback unavailable.") # Adjusted warning
112
+ if not APIFY_API_TOKEN: logger.warning("Optional secret 'APIFY_API_TOKEN' not found. Second YT transcript fallback unavailable.") # Adjusted warning
113
  if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
114
 
115
  logger.info("Secret loading and configuration check finished.")
 
117
  logger.info(f"Using OpenRouter Model (Fallback): {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}")
118
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
119
  _apify_token_exists = bool(APIFY_API_TOKEN)
120
+ _urltotext_key_exists = bool(URLTOTEXT_API_KEY)
121
+ _rapidapi_key_exists = bool(RAPIDAPI_KEY)
122
 
123
 
124
  if _gemini_primary_enabled:
 
148
 
149
 
150
  # --- Content Fetching Functions ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
+ # --- YouTube Transcript Fetching (Unchanged) ---
153
  async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
154
  if not video_id: logger.error("[Supadata] No video_id provided"); return None
155
  if not api_key: logger.error("[Supadata] API key missing."); return None
 
237
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
238
  logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
239
  transcript_text = None
240
+ # Method 1: youtube-transcript-api (Primary)
241
  logger.info("[Primary YT] Attempting youtube-transcript-api...")
242
  try:
243
  transcript_list = await asyncio.to_thread( YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'] )
 
249
  if "No transcript found" in str(e): logger.warning(f"[Primary YT] No transcript found.")
250
  elif "disabled" in str(e): logger.warning(f"[Primary YT] Transcripts disabled.")
251
  transcript_text = None
252
+ # Method 2: Supadata (Fallback 1)
253
  if transcript_text is None:
254
  logger.info("[Fallback YT 1] Trying Supadata API...")
255
  if SUPADATA_API_KEY:
 
257
  if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text
258
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
259
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
260
+ # Method 3: Apify (Fallback 2)
261
  if transcript_text is None:
262
  logger.info("[Fallback YT 2] Trying Apify REST API (SyncItems)...")
263
  if APIFY_API_TOKEN:
 
265
  if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify SyncItems REST for {video_url}"); return transcript_text
266
  else: logger.warning(f"[Fallback YT 2] Apify SyncItems REST failed or no content for {video_url}.")
267
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
268
+ # Final Result
269
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
270
  return transcript_text
271
 
272
+ # --- Website Content Fetching (MODIFIED SECTION) ---
273
+
274
+ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
275
+ """Directly fetches URL content using httpx. (Primary Web Method - Fetching part)"""
276
+ headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
277
+ try:
278
+ async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
279
+ logger.debug(f"[Web Scrape Direct] Sending request to {url}")
280
+ response = await client.get(url)
281
+ logger.debug(f"[Web Scrape Direct] Received response {response.status_code} from {url}")
282
+ response.raise_for_status()
283
+ content_type = response.headers.get('content-type', '').lower()
284
+ if 'html' not in content_type: logger.warning(f"[Web Scrape Direct] Non-HTML content type from {url}: {content_type}"); return None
285
+ try: return response.text
286
+ except Exception as e: logger.error(f"[Web Scrape Direct] Error decoding response for {url}: {e}"); return None
287
+ except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape Direct] HTTP error {e.response.status_code} fetching {url}: {e}")
288
+ except httpx.TimeoutException: logger.error(f"[Web Scrape Direct] Timeout error fetching {url}")
289
+ except httpx.TooManyRedirects: logger.error(f"[Web Scrape Direct] Too many redirects fetching {url}")
290
+ except httpx.RequestError as e: logger.error(f"[Web Scrape Direct] Request error fetching {url}: {e}")
291
+ except Exception as e: logger.error(f"[Web Scrape Direct] Unexpected error fetching {url}: {e}", exc_info=True)
292
+ return None
293
+
294
  async def get_website_content(url: str) -> Optional[str]:
295
+ """Primary method: Fetches HTML directly and parses with BeautifulSoup."""
296
  if not url: logger.error("get_website_content: No URL"); return None
297
+ logger.info(f"[Web Scrape Primary] Fetching website content for: {url}")
298
  html_content = await fetch_url_content_for_scrape(url)
299
  if not html_content: return None
300
  try:
301
  def parse_html(content):
302
  soup = BeautifulSoup(content, DEFAULT_PARSER)
303
+ # Remove common non-content tags
304
+ for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]):
305
+ element.extract()
306
+ # Try to find main content areas
307
  main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
308
  target_element = main_content if main_content else soup.body
309
+ if not target_element: logger.warning(f"[Web Scrape Primary] Could not find body/main for parsing {url}"); return None
310
+ # Get text, clean up whitespace, join lines
311
  lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
312
  text = " ".join(lines)
313
+ if not text: logger.warning(f"[Web Scrape Primary] Extracted text empty after clean for {url}"); return None
314
  return text
315
+ # Run parsing in a separate thread to avoid blocking
316
  text_content = await asyncio.to_thread(parse_html, html_content)
317
+ if text_content: logger.info(f"[Web Scrape Primary] Success scrape for {url} (final len: {len(text_content)})"); return text_content
318
  else: return None
319
+ except Exception as e: logger.error(f"[Web Scrape Primary] Error scraping/parsing {url}: {e}", exc_info=True); return None
320
 
321
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
322
+ """Fallback 1: Fetches website content using urltotext.com API."""
323
+ if not url: logger.error("[Web Scrape Fallback 1] No URL"); return None
324
+ if not api_key: logger.error("[Web Scrape Fallback 1] urltotext.com API key missing."); return None
325
+ logger.info(f"[Web Scrape Fallback 1] Attempting fetch for: {url} using urltotext.com API")
326
  api_endpoint = "https://urltotext.com/api/v1/urltotext/"
327
  payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
328
  headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" }
329
  try:
330
  async with httpx.AsyncClient(timeout=45.0) as client:
331
+ logger.debug(f"[Web Scrape Fallback 1] Sending request to urltotext.com API for {url}")
332
  response = await client.post(api_endpoint, headers=headers, json=payload)
333
+ logger.debug(f"[Web Scrape Fallback 1] Received status {response.status_code} from urltotext.com API for {url}")
334
  if response.status_code == 200:
335
  try:
336
  data = response.json()
337
  content = data.get("data", {}).get("content"); credits = data.get("credits_used", "N/A"); warning = data.get("data", {}).get("warning")
338
+ if warning: logger.warning(f"[Web Scrape Fallback 1] urltotext.com API Warning for {url}: {warning}")
339
+ if content and isinstance(content, str): logger.info(f"[Web Scrape Fallback 1] Success via urltotext.com API for {url}. Len: {len(content)}. Credits: {credits}"); return content.strip()
340
+ else: logger.warning(f"[Web Scrape Fallback 1] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
341
+ except json.JSONDecodeError: logger.error(f"[Web Scrape Fallback 1] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
342
+ except Exception as e: logger.error(f"[Web Scrape Fallback 1] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
343
+ elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[Web Scrape Fallback 1] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
344
+ else: logger.error(f"[Web Scrape Fallback 1] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
345
+ except httpx.TimeoutException: logger.error(f"[Web Scrape Fallback 1] Timeout connecting to urltotext.com API for {url}"); return None
346
+ except httpx.RequestError as e: logger.error(f"[Web Scrape Fallback 1] Request error connecting to urltotext.com API for {url}: {e}"); return None
347
+ except Exception as e: logger.error(f"[Web Scrape Fallback 1] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
348
+
349
+ # --- NEW Fallback Functions ---
350
+
351
+ async def get_website_content_via_scrapers_proxy(url: str, api_key: str) -> Optional[str]:
352
+ """Fallback 2 (NEW): Fetches website content using Scraper's Proxy Parser via RapidAPI."""
353
+ if not url: logger.error("[Web Scrape Fallback 2] No URL provided"); return None
354
+ if not api_key: logger.error("[Web Scrape Fallback 2] RapidAPI key missing."); return None
355
+ logger.info(f"[Web Scrape Fallback 2] Attempting fetch for: {url} using Scraper's Proxy Parser API")
356
+
357
+ api_host = "scrapers-proxy2.p.rapidapi.com"
358
+ encoded_url = urllib.parse.quote(url, safe='') # URL Encode the target URL
359
+ api_endpoint = f"https://{api_host}/parser?url={encoded_url}&auto_detect=true"
360
+ headers = {
361
+ "x-rapidapi-host": api_host,
362
+ "x-rapidapi-key": api_key
363
+ }
364
+ try:
365
+ async with httpx.AsyncClient(timeout=40.0) as client: # Increased timeout slightly
366
+ logger.debug(f"[Web Scrape Fallback 2] Sending GET request to {api_host} for {url}")
367
+ response = await client.get(api_endpoint, headers=headers)
368
+ logger.debug(f"[Web Scrape Fallback 2] Received status {response.status_code} from {api_host} for {url}")
369
+
370
+ if response.status_code == 200:
371
+ try:
372
+ data = response.json()
373
+ # Try to extract content, potentially combining title and content
374
+ content = data.get("content")
375
+ title = data.get("title")
376
+ extracted_text = ""
377
+ if title and isinstance(title, str): extracted_text += title.strip() + ". "
378
+ if content and isinstance(content, str): extracted_text += content.strip()
379
+
380
+ if extracted_text:
381
+ logger.info(f"[Web Scrape Fallback 2] Success via Scraper's Proxy Parser API for {url}. Len: {len(extracted_text)}")
382
+ return extracted_text
383
+ else:
384
+ logger.warning(f"[Web Scrape Fallback 2] Scraper's Proxy API success but content/title empty/invalid for {url}. Response keys: {list(data.keys())}")
385
+ return None
386
+ except json.JSONDecodeError:
387
+ logger.error(f"[Web Scrape Fallback 2] Failed JSON decode Scraper's Proxy API for {url}. Status:{response.status_code}. Resp:{response.text[:500]}")
388
+ return None
389
+ except Exception as e:
390
+ logger.error(f"[Web Scrape Fallback 2] Error processing Scraper's Proxy API success response for {url}: {e}", exc_info=True)
391
+ return None
392
+ # Handle RapidAPI specific errors if known, otherwise general errors
393
+ elif response.status_code == 401: logger.error(f"[Web Scrape Fallback 2] Auth error (401) with {api_host}. Check RapidAPI key."); return None
394
+ elif response.status_code == 403: logger.error(f"[Web Scrape Fallback 2] Forbidden (403) from {api_host}. Check API subscription/limits."); return None
395
+ elif response.status_code == 429: logger.warning(f"[Web Scrape Fallback 2] Rate Limit (429) from {api_host}."); return None
396
+ elif response.status_code >= 500: logger.error(f"[Web Scrape Fallback 2] Server error ({response.status_code}) from {api_host}. Resp:{response.text[:200]}"); return None # API itself failed
397
+ else: logger.error(f"[Web Scrape Fallback 2] Unexpected status {response.status_code} from {api_host} API for {url}. Resp:{response.text[:200]}"); return None
398
+ except httpx.TimeoutException: logger.error(f"[Web Scrape Fallback 2] Timeout connecting to {api_host} API for {url}"); return None
399
+ except httpx.RequestError as e: logger.error(f"[Web Scrape Fallback 2] Request error connecting to {api_host} API for {url}: {e}"); return None
400
+ except Exception as e: logger.error(f"[Web Scrape Fallback 2] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None
401
+
402
+ async def get_website_content_via_ai_web_scraper(url: str, api_key: str) -> Optional[str]:
403
+ """Fallback 3 (NEW): Fetches website content using AI Web Scraper via RapidAPI."""
404
+ if not url: logger.error("[Web Scrape Fallback 3] No URL provided"); return None
405
+ if not api_key: logger.error("[Web Scrape Fallback 3] RapidAPI key missing."); return None
406
+ logger.info(f"[Web Scrape Fallback 3] Attempting fetch for: {url} using AI Web Scraper API")
407
+
408
+ api_host = "ai-web-scraper.p.rapidapi.com"
409
+ api_endpoint = f"https://{api_host}/extract_content/v1"
410
+ headers = {
411
+ 'Content-Type': 'application/x-www-form-urlencoded',
412
+ 'x-rapidapi-host': api_host,
413
+ 'x-rapidapi-key': api_key
414
+ }
415
+ # Data needs to be form-encoded, httpx handles this with `data=` param
416
+ payload = {'url': url}
417
+
418
+ try:
419
+ async with httpx.AsyncClient(timeout=45.0) as client: # Slightly longer timeout for potential AI processing
420
+ logger.debug(f"[Web Scrape Fallback 3] Sending POST request to {api_host} for {url}")
421
+ response = await client.post(api_endpoint, headers=headers, data=payload)
422
+ logger.debug(f"[Web Scrape Fallback 3] Received status {response.status_code} from {api_host} for {url}")
423
+
424
+ if response.status_code == 200:
425
+ try:
426
+ data = response.json()
427
+ # Infer response structure - Try common keys for content
428
+ content = None
429
+ if isinstance(data, dict):
430
+ content = data.get("content") or data.get("text") or data.get("extracted_text") or data.get("result")
431
+ # If it's a simple string response directly
432
+ elif isinstance(data, str):
433
+ content = data
434
+
435
+ if content and isinstance(content, str):
436
+ logger.info(f"[Web Scrape Fallback 3] Success via AI Web Scraper API for {url}. Len: {len(content)}")
437
+ return content.strip()
438
+ else:
439
+ logger.warning(f"[Web Scrape Fallback 3] AI Web Scraper API success but content empty/invalid format for {url}. Response type: {type(data)}, Keys: {list(data.keys()) if isinstance(data, dict) else 'N/A'}")
440
+ return None
441
+ except json.JSONDecodeError:
442
+ logger.error(f"[Web Scrape Fallback 3] Failed JSON decode AI Web Scraper API for {url}. Status:{response.status_code}. Resp:{response.text[:500]}")
443
+ return None
444
+ except Exception as e:
445
+ logger.error(f"[Web Scrape Fallback 3] Error processing AI Web Scraper API success response for {url}: {e}", exc_info=True)
446
+ return None
447
+ # Handle RapidAPI specific errors if known, otherwise general errors
448
+ elif response.status_code == 401: logger.error(f"[Web Scrape Fallback 3] Auth error (401) with {api_host}. Check RapidAPI key."); return None
449
+ elif response.status_code == 403: logger.error(f"[Web Scrape Fallback 3] Forbidden (403) from {api_host}. Check API subscription/limits."); return None
450
+ elif response.status_code == 429: logger.warning(f"[Web Scrape Fallback 3] Rate Limit (429) from {api_host}."); return None
451
+ elif response.status_code >= 500: logger.error(f"[Web Scrape Fallback 3] Server error ({response.status_code}) from {api_host}. Resp:{response.text[:200]}"); return None # API itself failed
452
+ else: logger.error(f"[Web Scrape Fallback 3] Unexpected status {response.status_code} from {api_host} API for {url}. Resp:{response.text[:200]}"); return None
453
+ except httpx.TimeoutException: logger.error(f"[Web Scrape Fallback 3] Timeout connecting to {api_host} API for {url}"); return None
454
+ except httpx.RequestError as e: logger.error(f"[Web Scrape Fallback 3] Request error connecting to {api_host} API for {url}: {e}"); return None
455
+ except Exception as e: logger.error(f"[Web Scrape Fallback 3] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None
456
+
457
+ # --- Summarization Functions (Unchanged) ---
458
 
459
  async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
460
  """Internal function to call Gemini API. Returns (summary, error_message)."""
 
540
 
541
  if summary:
542
  logger.info(f"[Gemini Primary] Success generating summary. Output len: {len(summary)}");
 
 
 
543
  return summary.strip(), None
544
  else:
545
  finish_reason = response.candidates[0].finish_reason if response.candidates else 'N/A'
 
618
  summary = message.get("content")
619
  if summary:
620
  logger.info(f"[OpenRouter Fallback] Success via OpenRouter. Output len: {len(summary)}")
 
 
621
  return summary.strip(), None
622
  else:
623
  logger.warning(f"[OpenRouter Fallback] OpenRouter success but content empty. Resp: {data}")
 
708
  return "Sorry, an unknown error occurred during summary generation after trying all available models."
709
 
710
 
711
+ # --- Main Processing Logic (MODIFIED) ---
712
+
713
  async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
714
+ """Handles the entire process: fetching content (with fallbacks) and summarizing."""
715
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
716
  background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
717
  try: background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 ); bot = Bot(token=bot_token, request=background_request)
718
  except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
719
+
720
+ content: Optional[str] = None
721
+ user_feedback_message: Optional[str] = None
722
+ success: bool = False
723
+ status_message_id: Optional[int] = message_id_to_edit
724
+ message_to_delete_later_id : Optional[int] = None
725
+
726
  try:
727
+ # --- 1. Initial User Feedback ---
728
  processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
729
  if status_message_id:
730
  try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None ); logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
 
734
  status_message = await retry_bot_operation( bot.send_message, chat_id=chat_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN )
735
  if status_message: message_to_delete_later_id = status_message.message_id; logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
736
  else: raise RuntimeError("Failed to send status message after retries.")
737
+ except Exception as e: logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}"); raise # Don't proceed if we can't communicate
738
+
739
  try:
740
+ # --- 2. Content Fetching ---
741
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
742
  is_youtube = is_youtube_url(url); logger.debug(f"[Task {task_id}] URL type: {'YouTube' if is_youtube else 'Website'}")
743
+
744
  if is_youtube:
745
+ # --- YouTube Transcript Logic (Unchanged) ---
746
  video_id = extract_youtube_id(url)
747
+ if video_id: content = await get_youtube_transcript(video_id, url) # Tries lib -> Supadata -> Apify
748
  else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
749
  if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
750
  else:
751
+ # --- Website Scraping Logic (with New Fallbacks) ---
752
+ global URLTOTEXT_API_KEY, RAPIDAPI_KEY, _urltotext_key_exists, _rapidapi_key_exists
753
+
754
+ # Method 1: Primary Scrape (Direct Fetch + BS4)
755
  content = await get_website_content(url)
756
+
757
+ # Method 2: Fallback 1 (urltotext.com)
758
  if not content:
759
+ logger.warning(f"[Task {task_id}] Primary web scrape failed for {url}. Trying Fallback 1 (urltotext.com).")
760
+ if _urltotext_key_exists:
 
761
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
762
  content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
763
+ if not content: logger.warning(f"[Task {task_id}] Fallback 1 (urltotext.com) also failed.")
764
+ else: logger.warning("[Task {task_id}] Fallback 1 (urltotext.com) API key unavailable. Skipping.")
765
+
766
+ # Method 3: Fallback 2 (Scraper's Proxy via RapidAPI - NEW)
767
+ if not content:
768
+ logger.warning(f"[Task {task_id}] Fallbacks 1 failed. Trying Fallback 2 (Scraper's Proxy).")
769
+ if _rapidapi_key_exists:
770
+ await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
771
+ content = await get_website_content_via_scrapers_proxy(url, RAPIDAPI_KEY)
772
+ if not content: logger.warning(f"[Task {task_id}] Fallback 2 (Scraper's Proxy) also failed.")
773
+ else: logger.warning("[Task {task_id}] Fallback 2 (Scraper's Proxy) RapidAPI key unavailable. Skipping.")
774
+
775
+ # Method 4: Fallback 3 (AI Web Scraper via RapidAPI - NEW)
776
+ if not content:
777
+ logger.warning(f"[Task {task_id}] Fallbacks 2 failed. Trying Fallback 3 (AI Web Scraper).")
778
+ if _rapidapi_key_exists:
779
+ await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
780
+ content = await get_website_content_via_ai_web_scraper(url, RAPIDAPI_KEY)
781
+ if not content: logger.warning(f"[Task {task_id}] Fallback 3 (AI Web Scraper) also failed.")
782
+ else: logger.warning("[Task {task_id}] Fallback 3 (AI Web Scraper) RapidAPI key unavailable. Skipping.")
783
+
784
+ # Final check for website content
785
+ if not content and not user_feedback_message:
786
+ user_feedback_message = "Sorry, I couldn't fetch content from that website using any available method (blocked/inaccessible/empty?)."
787
+
788
+ # --- 3. Summarization ---
789
  if content:
790
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
791
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
792
+ final_summary = await generate_summary(content, summary_type) # Calls Gemini -> OpenRouter
793
+
794
+ if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
795
+ user_feedback_message = final_summary # Use the error message from summarizer
796
+ logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
797
  else:
798
+ # Split long messages if needed
799
+ max_length = 4096
800
+ summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
801
+ # Send summary parts (using ParseMode=None as requested by AI prompt instructions)
802
  await retry_bot_operation( bot.send_message, chat_id=chat_id, text=summary_parts[0], parse_mode=None, link_preview_options={'is_disabled': True} )
803
+ for part in summary_parts[1:]:
804
+ await asyncio.sleep(0.5) # Small delay between parts
805
+ await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=None, link_preview_options={'is_disabled': True} )
806
+ success = True
807
+ logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts).")
808
+ user_feedback_message = None # Clear any previous error message if summarization succeeded
809
+
810
+ # --- 4. Handle Final Failure Feedback ---
811
+ if user_feedback_message: # If any step failed and set a message
812
+ logger.warning(f"[Task {task_id}] Sending failure feedback: {user_feedback_message}")
813
+ await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )
814
+
815
  except Exception as e:
816
+ # Catch unexpected errors during the inner try block (fetching/summarizing)
817
+ logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True)
818
+ user_feedback_message = "Oops! Something went really wrong during processing. Please try again later."
819
  try: await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
820
  except Exception: logger.error(f"[Task {task_id}] Failed to send unexpected error feedback.")
821
+
822
  except Exception as outer_e:
823
+ # Catch errors in the outer setup (bot creation, status message sending)
824
  logger.critical(f"[Task {task_id}] Critical outer error: {outer_e}", exc_info=True)
825
  try:
826
+ if bot: await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ Critical internal error occurred. Could not start processing." )
827
  except Exception: logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
828
  finally:
829
+ # --- 5. Cleanup ---
830
+ # Delete the "Processing..." or original button message
831
  delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
832
  if delete_target_id and bot:
833
  try: await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id); logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
834
  except Exception as del_e: logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
835
+ # Close the background bot's HTTP client
836
  if background_request and hasattr(background_request, '_client') and background_request._client:
837
  try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
838
  except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
839
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
840
 
841
+
842
+ # --- Telegram Handlers (Unchanged) ---
843
+
844
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
845
  user = update.effective_user; mention = user.mention_html()
846
  if not user or not update.message: return
847
  logger.info(f"User {user.id} used /start.")
848
+ await update.message.reply_html( f"👋 Hello {mention}! I can summarise YouTube links or website URLs.\n\nJust send me a link anytime!" )
849
 
850
  async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
851
  user = update.effective_user
 
859
  if not update.message or not update.message.text: return
860
  url = update.message.text.strip(); user = update.effective_user
861
  if not user: return
862
+ # Basic URL check
863
+ if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]:
864
+ logger.debug(f"Ignoring non-URL message from {user.id}")
865
+ # Optionally, send a message if you want to guide the user
866
+ # await update.message.reply_text("Please send a valid URL starting with http:// or https://")
867
+ return
868
+
869
  logger.info(f"User {user.id} sent potential URL: {url}")
870
  context.user_data['url_to_summarize'] = url; context.user_data['original_message_id'] = update.message.message_id
871
  keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
872
  reply_markup = InlineKeyboardMarkup(keyboard)
873
+ await update.message.reply_text( f"Okay, I see this link:\n`{url}`\n\nHow would you like it summarised?", reply_markup=reply_markup, disable_web_page_preview=True, parse_mode=ParseMode.MARKDOWN ) # Changed 'summarized', added Markdown
874
 
875
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
876
  query = update.callback_query
877
  if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
878
  user = query.from_user; summary_type = query.data; query_id = query.id
879
  try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id}")
880
+ except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True) # Log ack errors
881
+
882
  url = context.user_data.get('url_to_summarize'); message_id_to_edit = query.message.message_id
883
  logger.info(f"User {user.id} chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
884
+
885
  if not url:
886
+ logger.warning(f"No URL in context for user {user.id} (cb {query_id}). Might be an old button.")
887
+ try:
888
+ await query.edit_message_text(text="Sorry, I couldn't find the URL associated with this request (it might be too old). Please send the link again.")
889
+ except BadRequest as e:
890
+ if "message is not modified" in str(e).lower(): pass # Ignore if already edited
891
+ else: logger.error(f"Failed edit 'URL not found' msg: {e}")
892
  except Exception as e:
893
  logger.error(f"Failed edit 'URL not found' msg: {e}")
894
+ # Don't send a new message here, as it might confuse the user if they didn't interact recently
 
895
  return
896
 
897
+ # Clear context *after* checking it's valid for this interaction
898
  context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
899
 
900
+ global TELEGRAM_TOKEN, _gemini_primary_enabled, _openrouter_fallback_enabled
901
  if not TELEGRAM_TOKEN:
902
+ logger.critical("TG TOKEN missing in callback!")
903
+ try: await query.edit_message_text(text="❌ Bot configuration error. Cannot proceed.")
904
  except Exception: pass
905
  return
906
  if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
907
+ logger.critical("Neither Gemini nor OpenRouter API keys are configured/valid in callback!")
908
+ try: await query.edit_message_text(text="❌ AI configuration error: No summarization models available.")
909
  except Exception: pass
910
  return
911
  elif not _gemini_primary_enabled:
 
915
  logger.warning("Fallback AI (OpenRouter) is unavailable.")
916
  # No need to inform user unless primary fails later
917
 
918
+ logger.info(f"Scheduling background task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
919
+ # Schedule the background task
920
+ asyncio.create_task(
921
+ process_summary_task(
922
+ user_id=user.id,
923
+ chat_id=query.message.chat_id,
924
+ message_id_to_edit=message_id_to_edit,
925
+ url=url,
926
+ summary_type=summary_type,
927
+ bot_token=TELEGRAM_TOKEN
928
+ ),
929
+ name=f"SummaryTask-{user.id}-{message_id_to_edit}"
930
+ )
931
 
932
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
933
+ """Log Errors caused by Updates."""
934
+ # Ignore specific, known errors that are often harmless or handled elsewhere
935
+ ignore_errors = (AttributeError, BadRequest, TimedOut, NetworkError, RetryAfter)
936
+ if isinstance(context.error, ignore_errors):
937
+ # More specific logging for potentially ignorable errors
938
+ if isinstance(context.error, BadRequest) and any(err in str(context.error).lower() for err in ["message is not modified", "query is too old", "message to edit not found"]):
939
+ logger.debug(f"Ignoring known BadRequest in error_handler: {context.error}")
940
+ return
941
+ elif isinstance(context.error, AttributeError) and "object has no attribute" in str(context.error):
942
+ logger.debug(f"Ignoring handled AttributeError in error_handler: {context.error}")
943
+ return
944
+ else:
945
+ # Log other potentially recoverable network/timeout errors as warnings
946
+ logger.warning(f"Handled networking/API error in error_handler: {context.error}")
947
+ return
948
+
949
+ # Log all other exceptions as errors
950
  logger.error("Exception while handling an update:", exc_info=context.error)
951
+ # Optionally, notify the user about unexpected errors, but be careful not to spam
952
+ # if update and isinstance(update, Update) and update.effective_chat:
953
+ # try: await context.bot.send_message(chat_id=update.effective_chat.id, text="An unexpected error occurred.")
954
+ # except Exception: logger.error("Failed to send error message to user.")
955
+
956
+
957
+ # --- Application Setup & Web Framework (Unchanged) ---
958
 
959
  async def setup_bot_config() -> Application:
960
  logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
961
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
962
  custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
963
  application = Application.builder().token(TELEGRAM_TOKEN).request(custom_request).build()
964
+ # Command Handlers
965
+ application.add_handler(CommandHandler("start", start))
966
+ application.add_handler(CommandHandler("help", help_command))
967
+ # Message Handler (URLs)
968
+ application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND & filters.Entity("url") | filters.Entity("text_link"), handle_potential_url))
969
+ # Callback Query Handler (Buttons)
970
+ application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
971
+ # Error Handler
972
+ application.add_error_handler(error_handler)
973
+ logger.info("Telegram application handlers configured."); return application
974
 
975
  @contextlib.asynccontextmanager
976
  async def lifespan(app: Starlette):
 
985
  try:
986
  if await ptb_app.bot.delete_webhook(drop_pending_updates=True): logger.info("Webhook deleted.")
987
  else: logger.warning("Failed delete webhook (API returned False).")
988
+ except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1) # Short delay before setting new one
989
+
990
+ # Setup Webhook using SPACE_HOST env var from Hugging Face
991
+ space_host = os.environ.get("SPACE_HOST")
992
+ webhook_path = "/webhook" # Must match the route below
993
+ full_webhook_url = None
994
  if space_host:
995
+ protocol = "https" # HF Spaces use HTTPS
996
+ host = space_host.split('://')[-1] # Get the host part
997
+ full_webhook_url = f"{protocol}://{host.rstrip('/')}{webhook_path}"
998
+
999
  if full_webhook_url:
1000
+ logger.info(f"Setting webhook to: {full_webhook_url}")
1001
+ set_webhook_args = {
1002
+ "url": full_webhook_url,
1003
+ "allowed_updates": Update.ALL_TYPES, # Or specify types like [Update.MESSAGE, Update.CALLBACK_QUERY]
1004
+ "drop_pending_updates": True
1005
+ }
1006
+ if WEBHOOK_SECRET:
1007
+ set_webhook_args["secret_token"] = WEBHOOK_SECRET
1008
+ logger.info("Webhook secret token is configured.")
1009
+
1010
+ await asyncio.sleep(1.0) # Give Telegram servers a moment after potential delete
1011
  try:
1012
+ await ptb_app.bot.set_webhook(**set_webhook_args)
1013
+ webhook_info = await ptb_app.bot.get_webhook_info() # Verify
1014
+ if webhook_info.url == full_webhook_url:
1015
+ logger.info(f"Webhook successfully set: URL='{webhook_info.url}', Secret={bool(WEBHOOK_SECRET)}")
1016
+ else:
1017
+ logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', Got '{webhook_info.url}'. Check SPACE_HOST and path.")
1018
+ raise RuntimeError("Webhook URL mismatch after setting.")
1019
+
1020
+ await ptb_app.start() # Start the PTB application polling (for webhook updates)
1021
+ logger.info("PTB Application started in webhook mode.")
1022
+
1023
+ except Exception as e:
1024
+ logger.error(f"FATAL: Failed to set webhook: {e}", exc_info=True)
1025
+ raise RuntimeError(f"Failed to set webhook: {e}") from e
1026
+ else:
1027
+ logger.critical("Could not construct webhook URL from SPACE_HOST.")
1028
+ raise RuntimeError("Webhook URL could not be determined.")
1029
+ else:
1030
+ logger.critical("SPACE_HOST environment variable not found. Cannot set webhook automatically.")
1031
+ raise RuntimeError("SPACE_HOST environment variable is missing.")
1032
+
1033
+ logger.info("ASGI Lifespan: Startup complete."); yield # Application runs here
1034
+
1035
  except Exception as startup_err:
1036
  logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
1037
  if ptb_app:
1038
  if ptb_app.running: await ptb_app.stop()
1039
  await ptb_app.shutdown()
1040
+ raise # Propagate error to stop Starlette
1041
  finally:
1042
+ # --- Shutdown ---
1043
  logger.info("ASGI Lifespan: Shutdown initiated...")
1044
  if ptb_app:
1045
+ if ptb_app.running: logger.info("Stopping PTB Application..."); await ptb_app.stop()
1046
+ logger.info("Shutting down PTB Application..."); await ptb_app.shutdown(); logger.info("PTB Application shut down.")
1047
+ # Attempt to clean up webhook on shutdown (optional, might fail if app is stopping forcefully)
1048
+ try:
1049
+ logger.info("Attempting to delete webhook on shutdown...")
1050
+ await ptb_app.bot.delete_webhook(drop_pending_updates=True)
1051
+ logger.info("Webhook deleted on shutdown.")
1052
+ except Exception as e:
1053
+ logger.warning(f"Could not delete webhook during shutdown: {e}")
1054
+ else: logger.info("PTB application was not fully initialized or failed during startup.")
1055
  logger.info("ASGI Lifespan: Shutdown complete.")
1056
 
1057
  async def health_check(request: Request) -> PlainTextResponse:
1058
+ """Simple health check endpoint."""
1059
  global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled
1060
  bot_status = "Not Initialized"
1061
  if ptb_app and ptb_app.bot:
1062
  try:
1063
+ if ptb_app.running:
1064
+ bot_info = await ptb_app.bot.get_me()
1065
+ bot_status = f"Running (@{bot_info.username})"
1066
+ else:
1067
+ bot_status = "Initialized but Not running"
1068
+ except Exception as e:
1069
+ bot_status = f"Error checking status: {e}"
1070
+
1071
+ return PlainTextResponse(
1072
+ f"TG Bot Summariser - Status: {bot_status}\n" # Changed 'Summarizer'
1073
+ f"Primary Model (Gemini): {GEMINI_MODEL if _gemini_primary_enabled else 'N/A (Disabled)'}\n"
1074
+ f"Fallback Model (OpenRouter): {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'N/A (Disabled)'}\n"
1075
+ f"YT Fallback (Apify Actor): {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}\n"
1076
+ f"Web Fallback 1 (urltotext): {'Enabled' if _urltotext_key_exists else 'Disabled'}\n"
1077
+ f"Web Fallbacks 2/3 (RapidAPI): {'Enabled' if _rapidapi_key_exists else 'Disabled'}"
1078
+ )
1079
 
1080
  async def telegram_webhook(request: Request) -> Response:
1081
+ """Handles incoming updates from Telegram."""
1082
  global WEBHOOK_SECRET
1083
+ if not ptb_app: logger.error("Webhook received but PTB application not initialized."); return PlainTextResponse('Bot not initialized', status_code=503)
1084
+ if not ptb_app.running: logger.warning("Webhook received but PTB application not running."); return PlainTextResponse('Bot not running, cannot process update', status_code=503)
1085
+
1086
+ # --- Security Check ---
1087
+ if WEBHOOK_SECRET:
1088
+ token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
1089
+ if token_header != WEBHOOK_SECRET:
1090
+ logger.warning(f"Webhook received with invalid secret token. Header: '{token_header}'")
1091
+ return Response(content="Invalid secret token", status_code=403) # Forbidden
1092
+
1093
+ # --- Process Update ---
1094
  try:
1095
+ update_data = await request.json()
1096
+ update = Update.de_json(data=update_data, bot=ptb_app.bot)
1097
+ logger.debug(f"Processing update_id: {update.update_id} via webhook")
1098
+ await ptb_app.process_update(update)
1099
+ return Response(status_code=200) # OK - Tell Telegram we received it
1100
+ except json.JSONDecodeError:
1101
+ logger.error("Webhook received invalid JSON.")
1102
+ return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
1103
+ except Exception as e:
1104
+ logger.error(f"Error processing webhook update: {e}", exc_info=True)
1105
+ # Still return 200 OK to Telegram, otherwise it will keep retrying the same failed update.
1106
+ # The error is logged for debugging.
1107
+ return Response(status_code=200)
1108
+
1109
+ # --- Starlette App Definition ---
1110
+ app = Starlette(
1111
+ debug=False, # Set to True for more verbose errors during development ONLY
1112
+ lifespan=lifespan,
1113
+ routes=[
1114
+ Route("/", endpoint=health_check, methods=["GET"]),
1115
+ Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), # Matches webhook_path
1116
+ ]
1117
+ )
1118
  logger.info("Starlette ASGI application created with native routes.")
1119
 
1120
+ # --- Development Server (if run directly) ---
1121
  if __name__ == '__main__':
1122
  import uvicorn
1123
+ logger.warning("Running in development mode using Uvicorn directly - NOT for production!")
1124
+ # Use environment variables for config if available, otherwise defaults
1125
  log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
1126
+ local_port = int(os.environ.get('PORT', 8080)) # Use PORT env var common in PaaS
1127
+ uvicorn.run(
1128
+ "__main__:app",
1129
+ host='0.0.0.0', # Listen on all available network interfaces
1130
+ port=local_port,
1131
+ log_level=log_level,
1132
+ reload=True # Enable auto-reload for development
1133
+ )