fmab777 commited on
Commit
2a2d394
·
verified ·
1 Parent(s): 1d82147

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +64 -219
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Revised: Increased Pool/Timeouts + Robust Callback Handling)
2
  import os
3
  import re
4
  import logging
@@ -28,7 +28,7 @@ from telegram.error import NetworkError, RetryAfter, TimedOut # Import TimedOut
28
  from telegram.request import HTTPXRequest # Import the request class
29
 
30
  # --- Other Libraries ---
31
- import httpx # <<<--- ADDED IMPORT for httpx.Limits
32
  from youtube_transcript_api import YouTubeTranscriptApi
33
  import requests
34
  from bs4 import BeautifulSoup
@@ -62,7 +62,6 @@ ptb_app: Application | None = None
62
  # --- Environment Variable Loading ---
63
  logger.info("Attempting to load secrets...")
64
  def get_secret(secret_name):
65
- # logger.debug(f"Attempting to read secret: {secret_name}") # Optional: Less verbose startup
66
  value = os.environ.get(secret_name)
67
  if value: logger.info(f"Secret '{secret_name}': Found (Value length: {len(value)})")
68
  else: logger.warning(f"Secret '{secret_name}': Not Found")
@@ -80,7 +79,7 @@ logger.info("Secret loading attempt finished.")
80
  # (Keep ALL your functions: is_youtube_url, extract_youtube_id,
81
  # get_transcript_via_supadata, get_transcript_via_apify,
82
  # get_youtube_transcript, get_website_content_via_requests,
83
- # get_website_content_via_urltotext_api, generate_summary)
84
 
85
  # Helper Functions
86
  def is_youtube_url(url):
@@ -112,7 +111,6 @@ async def get_transcript_via_supadata(video_id: str, api_key: str):
112
  params = {"videoId": video_id, "format": "text"}
113
  headers = {"X-API-Key": api_key}
114
  try:
115
- # Consider removing verify=False if possible, or manage certificates properly
116
  logger.warning("[Supadata] Making request with verify=False (Attempting to bypass SSL verification - Potential Security Risk)")
117
  response = await asyncio.to_thread(requests.get, api_endpoint, headers=headers, params=params, timeout=30, verify=False)
118
  logger.debug(f"[Supadata] Received status code {response.status_code} for {video_id}")
@@ -284,23 +282,21 @@ async def get_website_content_via_requests(url):
284
  if not url: logger.error("[Web Scraper - Requests/BS4] get_website_content_via_requests called with no URL"); return None
285
  logger.info(f"[Web Scraper - Requests/BS4] Attempting fetch: {url}")
286
  try:
287
- # Standard headers, avoid overly aggressive scraping patterns
288
  headers = {
289
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', # Updated UA
290
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
291
  'Accept-Language': 'en-US,en;q=0.9',
292
  'Connection': 'keep-alive',
293
- 'DNT': '1', # Do Not Track header
294
  'Upgrade-Insecure-Requests': '1'
295
  }
296
  response = await asyncio.to_thread(requests.get, url, headers=headers, timeout=25, allow_redirects=True)
297
- response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
298
  logger.debug(f"[Web Scraper - Requests/BS4] Status {response.status_code} for {url}")
299
 
300
  content_type = response.headers.get('content-type', '').lower()
301
  if 'html' not in content_type:
302
  logger.warning(f"[Web Scraper - Requests/BS4] Non-HTML content type received: {content_type}. Attempting plain text extraction.")
303
- # Allow plain text only if explicitly text/plain
304
  if 'text/plain' in content_type and response.text:
305
  logger.info(f"[Web Scraper - Requests/BS4] Extracted plain text content. Length: {len(response.text.strip())}")
306
  return response.text.strip()
@@ -308,39 +304,25 @@ async def get_website_content_via_requests(url):
308
  return None
309
 
310
  soup = BeautifulSoup(response.text, 'html.parser')
311
-
312
- # Remove common non-content tags more aggressively
313
  tags_to_remove = ["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "textarea", "select", "option", "label", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "video", "audio", "picture", "source"]
314
- # Also remove elements often used for ads or menus by class/id
315
  selectors_to_remove = ['.ad', '#ad', '.ads', '#ads', '.advertisement', '#advertisement', '.banner', '#banner', '.menu', '#menu', '.navigation', '#navigation', '.sidebar', '#sidebar', '.social', '#social', '.share', '#share', '.related', '#related', '.comments', '#comments', '.cookie-consent', '#cookie-consent']
316
-
317
  for tag in soup(tags_to_remove): tag.decompose()
318
  for selector in selectors_to_remove:
319
  for element in soup.select(selector): element.decompose()
320
 
321
- # Try to find semantic main content areas first
322
- main_content = soup.find('main') or \
323
- soup.find('article') or \
324
- soup.find(id='content') or \
325
- soup.find(class_='content') or \
326
- soup.find(id='main-content') or \
327
- soup.find(class_='main-content') or \
328
- soup.find(role='main')
329
-
330
  target_element = main_content if main_content else soup.body
331
  if not target_element:
332
  logger.warning(f"[Web Scraper - Requests/BS4] Could not find a suitable target element (main, article, body) for {url}");
333
  return None
334
 
335
- # Extract text, attempting to preserve paragraphs better
336
  lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
337
- text = "\n\n".join(lines) # Join lines with double newline for paragraph separation
338
 
339
- MIN_TEXT_LENGTH = 100 # Increased minimum length
340
  if not text or len(text) < MIN_TEXT_LENGTH:
341
  logger.warning(f"[Web Scraper - Requests/BS4] Extracted text is too short (<{MIN_TEXT_LENGTH} chars) after cleaning for {url}. Length: {len(text)}. Content might be JS-rendered or blocked.")
342
- # Optional: Log the short text for debugging: logger.debug(f"Short text: {text[:500]}")
343
- return None # Treat very short text as failure
344
 
345
  logger.info(f"[Web Scraper - Requests/BS4] Successfully scraped and cleaned content from {url}. Final Length: {len(text)}")
346
  return text
@@ -358,18 +340,17 @@ async def get_website_content_via_urltotext_api(url: str, api_key: str):
358
  if not api_key: logger.error("[Web Scraper - URLToText API] API key is missing."); return None
359
  logger.info(f"[Web Scraper - URLToText API] Attempting fetch via API: {url}")
360
  api_endpoint = "https://urltotext.com/api/v1/urltotext/"
361
- # Ensure payload includes options beneficial for scraping modern sites
362
  payload = json.dumps({
363
  "url": url,
364
  "output_format": "text",
365
- "extract_main_content": True, # Try to get just the core article/content
366
- "render_javascript": True, # Crucial for JS-heavy sites
367
- "residential_proxy": False, # Set to True if facing blocks, requires appropriate plan
368
- "timeout_render": 20000, # Increase JS render timeout (in ms)
369
  })
370
  headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"}
371
  try:
372
- response = await asyncio.to_thread(requests.post, api_endpoint, headers=headers, data=payload, timeout=60) # Increased overall timeout
373
  logger.debug(f"[Web Scraper - URLToText API] Received status code {response.status_code} for {url}")
374
  if response.status_code == 200:
375
  try:
@@ -378,10 +359,10 @@ async def get_website_content_via_urltotext_api(url: str, api_key: str):
378
  content = content_data.get("content")
379
  credits = data.get("credits_used", "N/A")
380
  warning = content_data.get("warning")
381
- error_msg = content_data.get("error") # Check for specific error in response data
382
 
383
  if warning: logger.warning(f"[Web Scraper - URLToText API] API Warning for {url}: {warning}")
384
- if error_msg: logger.error(f"[Web Scraper - URLToText API] API Error reported for {url}: {error_msg}"); return None # Treat API error as failure
385
 
386
  if content and isinstance(content, str):
387
  logger.info(f"[Web Scraper - URLToText API] Successfully fetched content via API. Length: {len(content.strip())}. Credits Used: {credits}");
@@ -394,11 +375,11 @@ async def get_website_content_via_urltotext_api(url: str, api_key: str):
394
  elif response.status_code == 400: logger.error(f"[Web Scraper - URLToText API] Bad Request (400) to API. Check payload/URL. Response: {response.text[:200]}...")
395
  elif response.status_code == 401: logger.error(f"[Web Scraper - URLToText API] Unauthorized (401). Check API Key. Response: {response.text[:200]}...")
396
  elif response.status_code == 402: logger.error(f"[Web Scraper - URLToText API] Payment Required (402). Check API credits/plan. Response: {response.text[:200]}...")
397
- elif response.status_code == 422: logger.warning(f"[Web Scraper - URLToText API] Unprocessable URL / Fetch Error (422) reported by API for {url}. Response: {response.text[:200]}...") # Might mean the site blocked the API
398
  elif response.status_code == 429: logger.warning(f"[Web Scraper - URLToText API] Rate Limit Hit (429). Response: {response.text[:200]}...")
399
  elif response.status_code >= 500: logger.error(f"[Web Scraper - URLToText API] API Server Error ({response.status_code}). Response: {response.text[:200]}...")
400
  else: logger.error(f"[Web Scraper - URLToText API] Unexpected status code {response.status_code} from API. Response: {response.text[:200]}...")
401
- return None # Return None for all non-200 responses after logging
402
  except requests.exceptions.Timeout: logger.error(f"[Web Scraper - URLToText API] Timeout connecting to API for {url}"); return None
403
  except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - URLToText API] Request error connecting to API: {e}"); return None
404
  except Exception as e: logger.error(f"[Web Scraper - URLToText API] Unexpected error during API call: {e}", exc_info=True); return None
@@ -411,11 +392,8 @@ async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
411
  if not text or not text.strip(): logger.warning("generate_summary called with empty or whitespace-only text."); return "Error: No content was provided to summarize."
412
 
413
  openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
414
- # Consider using a non-free model if rate limits are hit or quality needed
415
  model_name = "deepseek/deepseek-chat:free"
416
- # model_name = "openai/gpt-3.5-turbo" # Example alternative
417
 
418
- # --- UPDATED PROMPTS ---
419
  if summary_type == "paragraph":
420
  system_message = (
421
  "You are an expert summarization AI. Your goal is to provide a concise, easy-to-understand summary of the provided text. "
@@ -449,12 +427,8 @@ async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
449
  else:
450
  logger.error(f"Invalid summary_type '{summary_type}' requested.")
451
  return f"Error: Invalid summary type ('{summary_type}') requested. Please choose 'paragraph' or 'points'."
452
- # --- END UPDATED PROMPTS ---
453
 
454
- # Practical limit for API context window / cost control
455
- # Deepseek context might be larger, but set a reasonable app limit
456
- MAX_INPUT_TOKENS_ESTIMATE = 28000 # Rough estimate for deepseek-chat's context limit (aim lower than max)
457
- # Simple character length heuristic (adjust based on typical content)
458
  AVG_CHARS_PER_TOKEN = 4
459
  MAX_INPUT_LENGTH = MAX_INPUT_TOKENS_ESTIMATE * AVG_CHARS_PER_TOKEN
460
 
@@ -463,33 +437,29 @@ async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
463
  truncation_marker = "\n\n[... Text truncated due to length ...]"
464
  text = text[:MAX_INPUT_LENGTH - len(truncation_marker)] + truncation_marker
465
 
466
- # Construct the messages payload for the API
467
  messages = [
468
  {"role": "system", "content": system_message},
469
  {"role": "user", "content": f"{user_prompt_instruction}\n\n--- TEXT TO SUMMARIZE ---\n\n{text}\n\n--- END OF TEXT ---"}
470
  ]
471
 
472
- # Referer and Title for OpenRouter identification
473
- space_host = os.environ.get("SPACE_HOST", "huggingface.co/spaces/YOUR_SPACE_NAME") # Replace default if needed
474
  referer_url = f"https://{space_host}" if space_host and not space_host.startswith("http") else space_host or "https://huggingface.co"
475
  headers = {
476
  "Authorization": f"Bearer {api_key}",
477
  "Content-Type": "application/json",
478
  "HTTP-Referer": referer_url,
479
- "X-Title": "Telegram URL Summarizer Bot" # Or your bot's name
480
  }
481
  payload = json.dumps({"model": model_name, "messages": messages})
482
 
483
  try:
484
  logger.debug(f"Sending request to OpenRouter (Model: {model_name}). Prompt length approx: {len(text)} chars.")
485
- # Increased timeout for potentially long AI generation
486
  response = await asyncio.to_thread(requests.post, openrouter_api_endpoint, headers=headers, data=payload, timeout=120)
487
  logger.debug(f"Received status {response.status_code} from OpenRouter.")
488
 
489
  if response.status_code == 200:
490
  try:
491
  data = response.json()
492
- # Check for response structure variations
493
  choice = data.get("choices", [{}])[0]
494
  message = choice.get("message", {})
495
  summary = message.get("content")
@@ -498,8 +468,7 @@ async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
498
  if summary and isinstance(summary, str) and summary.strip():
499
  summary = summary.strip()
500
  logger.info(f"Successfully generated summary. Finish Reason: {finish_reason}. Length: {len(summary)}")
501
- # Optional: Add post-processing checks (e.g., length for paragraph)
502
- if summary_type == "paragraph" and len(summary.split()) > 95: # Allow slight overrun from 85 words
503
  logger.warning(f"Generated paragraph summary slightly longer than target word count ({len(summary.split())} words).")
504
  return summary
505
  else:
@@ -513,16 +482,14 @@ async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
513
  logger.error(f"Unexpected error processing OpenRouter success response: {e}", exc_info=True)
514
  return "Sorry, an unexpected error occurred while processing the AI response."
515
 
516
- # Handle specific HTTP error codes from OpenRouter
517
  elif response.status_code == 401: logger.error("OpenRouter API key is invalid (Unauthorized - 401)."); return "Error: AI service authentication failed. Please check the configuration."
518
  elif response.status_code == 402: logger.error("OpenRouter Payment Required (402). Check credits/limits."); return "Sorry, there's an issue with the AI service account limits or payment."
519
  elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Hit (429)."); return "Sorry, the AI model is currently busy due to high demand. Please try again in a moment."
520
  elif response.status_code == 400: logger.error(f"OpenRouter Bad Request (400). Likely prompt issue. Response: {response.text[:500]}..."); return "Sorry, the request to the AI service was invalid (possibly due to the content or prompt)."
521
  elif response.status_code >= 500: logger.error(f"OpenRouter Server Error ({response.status_code}). Response: {response.text[:500]}..."); return "Sorry, the AI service is experiencing internal issues. Please try again later."
522
  else:
523
- # Handle other unexpected errors
524
  logger.error(f"Unexpected HTTP status {response.status_code} from OpenRouter. Response: {response.text[:500]}...")
525
- try: # Try to extract an error message from the response body
526
  error_data = response.json()
527
  error_msg = error_data.get("error", {}).get("message", response.text[:100])
528
  return f"Sorry, the AI service returned an error ({response.status_code}): {error_msg}"
@@ -539,9 +506,8 @@ async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
539
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
540
  """Handles the /start command."""
541
  user = update.effective_user
542
- if not user: return # Should not happen with a command
543
  logger.info(f"User {user.id} ({user.username or 'NoUsername'}) initiated /start.")
544
- # Use mention_html for linking username if available, otherwise just first name
545
  mention = user.mention_html() if user.username else user.first_name
546
  start_message = (
547
  f"👋 Hello {mention}!\n\n"
@@ -568,7 +534,6 @@ async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> No
568
  "- **Length:** Very long articles or videos might be truncated before summarization to fit within processing limits.\n\n"
569
  "Just send a link to get started!"
570
  )
571
- # Use MarkdownV2 for better formatting control if needed, but MARKDOWN is simpler
572
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
573
 
574
  async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -576,18 +541,14 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
576
  if not update.message or not update.message.text: return
577
  message_text = update.message.text.strip()
578
  user = update.effective_user
579
- if not user: return # Should not happen with a message
580
 
581
- # More robust URL regex (handles various protocols, domains, paths, queries)
582
- # Still simple, not aiming for perfect RFC 3986 validation
583
  url_pattern = r'https?://(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}(?:/[^\s]*)?'
584
  match = re.search(url_pattern, message_text)
585
 
586
  if match:
587
  url = match.group(0)
588
  logger.info(f"User {user.id} sent potential URL: {url}")
589
-
590
- # Store URL in user_data, associated with the user ID
591
  context.user_data['url_to_summarize'] = url
592
  logger.debug(f"Stored URL '{url}' in user_data for user {user.id}")
593
 
@@ -598,18 +559,13 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
598
  ]
599
  ]
600
  reply_markup = InlineKeyboardMarkup(keyboard)
601
-
602
- # Send message asking for summary type
603
  await update.message.reply_text(
604
  f"✅ Link received:\n`{url}`\n\nChoose your desired summary format:",
605
  reply_markup=reply_markup,
606
  parse_mode=ParseMode.MARKDOWN,
607
- link_preview_options={'is_disabled': True} # Disable link preview for this message
608
  )
609
  else:
610
- # If it doesn't look like a URL, maybe provide guidance?
611
- # logger.debug(f"Ignoring non-URL message from {user.id}: {message_text[:100]}")
612
- # Optional: Reply if it's not a command and not a URL
613
  if not message_text.startswith('/'):
614
  await update.message.reply_text("Please send me a valid URL (starting with http:// or https://) to summarize.")
615
 
@@ -619,53 +575,42 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
619
  query = update.callback_query
620
  if not query or not query.from_user:
621
  logger.warning("Callback query or user missing in update.")
622
- return # Can't proceed without query/user
623
  user = query.from_user
624
 
625
- # --- Answer Callback Query Immediately ---
626
  try:
627
- await query.answer() # Acknowledge the button press
628
  logger.debug(f"Answered callback query {query.id} for user {user.id}")
629
  except TimedOut:
630
- # Log timeout but proceed; the button loading indicator might just hang for the user
631
  logger.warning(f"Timeout answering callback query {query.id} for user {user.id}. Processing continues.")
632
  except Exception as e:
633
- # Log other errors but proceed cautiously. The button might remain "loading".
634
  logger.error(f"Error answering callback query {query.id} for user {user.id}: {e!r}", exc_info=True)
635
 
636
- summary_type = query.data # 'paragraph' or 'points'
637
- # Retrieve URL stored earlier for this user
638
  url = context.user_data.get('url_to_summarize')
639
  logger.info(f"User {user.id} chose summary type '{summary_type}'. Checking for stored URL.")
640
 
641
  if not url:
642
  logger.warning(f"User {user.id} pressed button '{summary_type}', but NO URL found in user_data context.")
643
  try:
644
- # Inform user context was lost (e.g., bot restarted, long delay)
645
  await query.edit_message_text(text="⚠️ Oops! I lost the context for that link. Please send the link again.")
646
  except TimedOut:
647
  logger.error(f"Timeout trying to edit message to inform user {user.id} about lost context.")
648
  except Exception as edit_err:
649
- # Log error if editing fails (message might already be gone, or other Telegram issue)
650
  logger.error(f"Failed to edit message for lost context for user {user.id}: {edit_err}")
651
- return # Stop processing if URL is missing
652
 
653
- # --- URL Found - Proceed with Processing ---
654
  logger.info(f"Processing URL '{url}' for user {user.id} with type '{summary_type}'.")
655
- # Clear the URL from context now that we're processing it
656
  context.user_data.pop('url_to_summarize', None)
657
  logger.debug(f"Cleared URL from user_data for user {user.id}")
658
 
659
- # Fetch current API keys (allows for potential runtime changes, though unlikely here)
660
  current_openrouter_key = os.environ.get('OPENROUTER_API_KEY')
661
  current_urltotext_key = os.environ.get('URLTOTEXT_API_KEY')
662
  current_supadata_key = os.environ.get('SUPADATA_API_KEY')
663
  current_apify_token = os.environ.get('APIFY_API_TOKEN')
664
- # Simple check log
665
  keys_present = f"OR={'Y' if current_openrouter_key else 'N'}, UTT={'Y' if current_urltotext_key else 'N'}, SD={'Y' if current_supadata_key else 'N'}, AP={'Y' if current_apify_token else 'N'}"
666
  logger.debug(f"API Key check for user {user.id} request: {keys_present}")
667
 
668
- # Critical dependency check: AI key
669
  if not current_openrouter_key:
670
  logger.error(f"CRITICAL: OpenRouter API key is missing. Cannot generate summary for user {user.id}.")
671
  try:
@@ -676,47 +621,39 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
676
  logger.error(f"Failed to edit message for missing AI key for user {user.id}: {edit_err}")
677
  return
678
 
679
- # --- Inform User Processing Has Started ---
680
  processing_message_text = f"⏳ Working on your '{summary_type}' summary for the link...\n_(This might take up to a minute depending on the content)_"
681
- message_to_edit = query.message # The message with the buttons
682
- status_message_sent = None # Will hold msg ID if we send a new status message
683
 
684
  try:
685
  if message_to_edit:
686
  await query.edit_message_text(text=processing_message_text)
687
  logger.debug(f"Edited original message {message_to_edit.message_id} to show 'Working...' status for query {query.id}")
688
  else:
689
- # This case should be rare if query.message exists, but handle defensively
690
  logger.warning(f"Original message (query.message) not found for query {query.id}. Cannot edit, will send new status message.")
691
- raise ValueError("Original message object missing") # Force fallback to sending new message
692
  except (TimedOut, Exception) as e:
693
- # If editing fails (e.g., message too old, deleted, rate limit), try sending a new message
694
  logger.warning(f"Could not edit original message {message_to_edit.message_id if message_to_edit else 'N/A'} for query {query.id}: {e!r}. Attempting to send a new status message.")
695
- message_to_edit = None # Ensure we don't try to delete this later if editing failed
696
  try:
697
  status_message_sent = await context.bot.send_message(chat_id=user.id, text=processing_message_text)
698
  logger.debug(f"Sent new status message {status_message_sent.message_id} to user {user.id}.")
699
  except TimedOut:
700
  logger.error(f"Timeout sending NEW 'Working...' status message to user {user.id}. Processing continues without feedback.")
701
- # User won't know bot is working - proceed anyway, hope for the best.
702
  except Exception as send_err:
703
  logger.error(f"Failed sending NEW 'Working...' status message to user {user.id}: {send_err}. Processing continues without feedback.")
704
- # As above.
705
 
706
- # --- Main Content Fetching and Summarization ---
707
  content = None
708
- user_feedback_message = None # Holds error/status messages for the user
709
- success = False # Tracks if we successfully sent a summary
710
 
711
  try:
712
- # Send 'typing' action to indicate activity
713
  try:
714
  logger.debug(f"Sending 'typing' chat action to chat {user.id}")
715
  await context.bot.send_chat_action(chat_id=user.id, action='typing')
716
  except TimedOut: logger.warning(f"Timeout sending 'typing' action for user {user.id}.")
717
  except Exception as ca_err: logger.warning(f"Failed sending 'typing' action for user {user.id}: {ca_err}")
718
 
719
- # --- Determine Content Type and Fetch ---
720
  is_yt = is_youtube_url(url)
721
  logger.debug(f"URL ({url}) is YouTube: {is_yt} (User: {user.id})")
722
 
@@ -734,18 +671,15 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
734
  logger.warning(f"Failed to extract YouTube video ID from URL: {url} (User: {user.id})")
735
  user_feedback_message = "⚠️ Sorry, I couldn't identify a valid YouTube video ID in the link you provided."
736
  else:
737
- # --- Website Scraping ---
738
  logger.info(f"Attempting website scrape (Requests/BS4) for URL: {url} (User: {user.id})")
739
  content = await get_website_content_via_requests(url)
740
  if content:
741
  logger.info(f"Website scrape successful (Requests/BS4). Length: {len(content)} (User: {user.id})")
742
- # Content found, no need for feedback message yet
743
  else:
744
  logger.warning(f"Primary website scrape failed for {url} (User: {user.id}). Trying fallback API.")
745
  if current_urltotext_key:
746
- # Send typing again if first scrape failed and we try another method
747
  try: await context.bot.send_chat_action(chat_id=user.id, action='typing'); logger.debug("Sent typing before fallback scrape.")
748
- except: pass # Ignore if fails
749
 
750
  logger.info(f"Attempting website scrape via URLToText API for: {url} (User: {user.id})")
751
  content = await get_website_content_via_urltotext_api(url, current_urltotext_key)
@@ -755,51 +689,44 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
755
  logger.warning(f"Fallback website scrape (URLToText API) also failed for {url} (User: {user.id}).")
756
  user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website using available methods. It might be protected or structured in a way I can't parse."
757
  else:
758
- # Fallback key missing
759
  logger.warning(f"Primary scrape failed and URLToText API key not configured. Cannot fallback for {url} (User: {user.id}).")
760
  user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website, and the fallback service isn't configured."
761
 
762
- # --- Generate Summary if Content Was Fetched ---
763
  if content:
764
  logger.info(f"Content fetched (Length: {len(content)}). Generating '{summary_type}' summary for user {user.id}.")
765
- # Send typing before potentially long AI call
766
  try: await context.bot.send_chat_action(chat_id=user.id, action='typing'); logger.debug("Sent typing before AI summary generation.")
767
  except: pass
768
 
769
  summary = await generate_summary(content, summary_type, current_openrouter_key)
770
 
771
- # Check if summary generation returned an error message
772
  if summary.startswith("Error:") or summary.startswith("Sorry,"):
773
  logger.warning(f"AI summary generation failed for user {user.id}. Reason: {summary}")
774
- user_feedback_message = f"⚠️ {summary}" # Use the error message from generate_summary
775
  else:
776
- # --- Summary Success - Send to User ---
777
  logger.info(f"Summary generated successfully for user {user.id}. Length: {len(summary)}. Sending result.")
778
  try:
779
  await context.bot.send_message(
780
  chat_id=user.id,
781
  text=summary,
782
- parse_mode=ParseMode.MARKDOWN, # Assuming AI generates markdown for points
783
  link_preview_options={'is_disabled': True}
784
  )
785
  success = True
786
- user_feedback_message = None # Clear any previous fetching error message
787
  logger.info(f"Successfully sent summary to user {user.id}.")
788
  except TimedOut:
789
  logger.error(f"Timeout sending final summary message to user {user.id}.")
790
  user_feedback_message = "⚠️ Sorry, there was a timeout while trying to send you the final summary."
791
- success = False # Mark as failed if sending timed out
792
  except Exception as send_final_err:
793
  logger.error(f"Failed sending final summary to user {user.id}: {send_final_err}", exc_info=True)
794
  user_feedback_message = "⚠️ Sorry, an unexpected error occurred while sending the final summary."
795
- success = False # Mark as failed
796
 
797
  elif not user_feedback_message:
798
- # If content is None, but no specific error message was set above, set a generic one.
799
  logger.warning(f"Content retrieval resulted in None, but no specific user feedback message was set. URL: {url} (User: {user.id})")
800
  user_feedback_message = "⚠️ Sorry, I couldn't retrieve any usable content from the link provided."
801
 
802
- # --- Send Final Feedback Message if Processing Failed ---
803
  if user_feedback_message and not success:
804
  logger.warning(f"Processing failed or summary sending failed for user {user.id}. Sending feedback: {user_feedback_message}")
805
  try:
@@ -810,114 +737,79 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
810
  logger.error(f"Failed sending final FAILURE feedback message to user {user.id}: {send_feedback_err}")
811
 
812
  except Exception as e:
813
- # Catch-all for unexpected errors during the main processing block
814
  logger.error(f"Unexpected critical error during callback processing for user {user.id}, URL {url}: {e}", exc_info=True)
815
  try:
816
- # Send a generic error message to the user
817
  await context.bot.send_message(chat_id=user.id, text="❌ Oops! An unexpected internal error occurred while processing your request. The issue has been logged.")
818
  except TimedOut:
819
  logger.error(f"Timeout sending CRITICAL internal error feedback message to user {user.id}.")
820
  except Exception as final_err:
821
- # If even sending the error message fails, log it.
822
  logger.error(f"Failed sending CRITICAL internal error feedback message to user {user.id}: {final_err}")
823
- # Ensure success is False if we hit this block
824
  success = False
825
 
826
  finally:
827
- # --- Clean up Status Message(s) ---
828
  logger.debug(f"Cleaning up status message(s) for user {user.id}, query {query.id}. Success={success}")
829
  try:
830
  if status_message_sent:
831
- # If we sent a separate "Working..." message, delete it regardless of success/failure
832
- # as the final result or error message has been (or attempted to be) sent.
833
  await context.bot.delete_message(chat_id=user.id, message_id=status_message_sent.message_id)
834
  logger.debug(f"Deleted separate status message {status_message_sent.message_id} for user {user.id}.")
835
  elif message_to_edit:
836
- # If we edited the original message with the buttons...
837
  if success:
838
- # If processing succeeded, delete the "Working..." message.
839
  await query.delete_message()
840
  logger.debug(f"Processing succeeded. Deleted original (edited) message {message_to_edit.message_id} for query {query.id}.")
841
  else:
842
- # If processing failed, *don't* delete the message.
843
- # It either still shows "Working..." (if sending final error failed)
844
- # or it might show an error message if edit_message_text was used for that.
845
- # Let's try to edit it one last time to show a generic failure if no specific feedback was sent.
846
- # This is complex, maybe just leave it as is for simplicity.
847
  logger.debug(f"Processing failed. Leaving edited message {message_to_edit.message_id} in place for query {query.id}.")
848
- # Optional: Try one last edit to show failure if needed, but might be overkill
849
- # if not user_feedback_message: # Only if no other error was sent
850
- # try: await query.edit_message_text("❌ Processing failed.")
851
- # except: pass # Ignore errors here
852
-
853
- # If message_to_edit was None (original edit failed) and status_message_sent was None (sending new status failed), there's nothing to delete here.
854
 
855
  except TimedOut:
856
  logger.warning(f"Timeout attempting to delete status/button message for user {user.id}, query {query.id}.")
857
  except Exception as del_e:
858
- # Log deletion errors as warnings, not critical if cleanup fails.
859
- # Common error: message already deleted or trying to delete too late.
860
  logger.warning(f"Could not delete status/button message for user {user.id}, query {query.id}: {del_e!r}")
861
 
862
- # Log the completion of the callback handling
863
  logger.info(f"Finished handling callback query {query.id} for user {user.id}. Overall Success: {success}")
864
 
865
 
866
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
867
  """Log Errors caused by Updates."""
868
  logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
869
- # Add specific error type handling if needed (e.g., NetworkError, TimedOut)
870
  if isinstance(context.error, TimedOut):
871
  logger.warning("A timeout error occurred in PTB communication.")
872
  elif isinstance(context.error, NetworkError):
873
  logger.warning(f"A network error occurred: {context.error}")
874
- # Consider notifying admin or user for specific critical errors if appropriate
875
 
876
- # --- Bot Setup Function (Modified: Increased Pool/Timeouts) ---
877
  async def setup_bot_config() -> Application:
878
- """Configures the PTB Application with custom HTTPX settings."""
879
  logger.info("Configuring Telegram Application...")
880
  if not TELEGRAM_TOKEN:
881
  logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable not found.")
882
  raise ValueError("TELEGRAM_TOKEN environment variable not set.")
883
 
884
- # --- Configure HTTPX client settings ---
885
  connect_timeout = 10.0 # Slightly higher connect timeout
886
- # --- INCREASED TIMEOUTS AND POOL SIZE ---
887
  read_timeout = 30.0 # Increased timeout for reading response
888
  write_timeout = 30.0 # Increased timeout for sending request
889
  pool_timeout = 30.0 # Increased timeout for getting connection from pool
890
- connection_pool_size = 50 # Significantly increased pool size
 
891
 
892
- logger.info(f"Creating PTB HTTPXRequest with settings: "
893
  f"connect_timeout={connect_timeout}, read_timeout={read_timeout}, "
894
- f"write_timeout={write_timeout}, pool_timeout={pool_timeout}, "
895
- f"pool_size={connection_pool_size}")
896
-
897
- # Create httpx.Limits object
898
- custom_limits = httpx.Limits(
899
- max_connections=connection_pool_size,
900
- max_keepalive_connections=connection_pool_size # Keepalive same as max
901
- # keepalive_expiry=60.0 # Optional: Keep idle connections open longer (seconds)
902
- )
903
 
904
- # Create a custom request object with these settings
905
  custom_request = HTTPXRequest(
906
  connect_timeout=connect_timeout,
907
  read_timeout=read_timeout,
908
  write_timeout=write_timeout,
909
  pool_timeout=pool_timeout,
910
- limits=custom_limits, # Use the Limits object here
911
- http_version="1.1" # HTTP/1.1 is usually fine, HTTP/2 might be slightly faster if supported end-to-end
912
  )
913
 
914
  # Use Application.builder() and pass the custom request object
915
  application_builder = Application.builder().token(TELEGRAM_TOKEN)
916
  application_builder.request(custom_request)
917
- # Also apply to get_updates if you were using polling (webhook doesn't use this heavily)
918
- # application_builder.get_updates_request(custom_request)
919
- # Apply connection pool settings globally if needed (less common now with direct request object)
920
- # application_builder.pool_timeout(pool_timeout) # This might be redundant if set in HTTPXRequest
921
 
922
  # Build the application instance
923
  application = application_builder.build()
@@ -925,11 +817,8 @@ async def setup_bot_config() -> Application:
925
  # --- Register Handlers ---
926
  application.add_handler(CommandHandler("start", start))
927
  application.add_handler(CommandHandler("help", help_command))
928
- # Handles non-command text messages that might contain a URL
929
  application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
930
- # Handles the button clicks ('paragraph' or 'points')
931
  application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
932
- # Global error handler
933
  application.add_error_handler(error_handler)
934
 
935
  logger.info("Telegram application handlers configured.")
@@ -941,51 +830,38 @@ async def lifespan(app: Starlette):
941
  """Handles PTB startup and shutdown during ASGI lifespan."""
942
  global ptb_app
943
  logger.info("ASGI Lifespan: Startup sequence initiated...")
944
- # loop = asyncio.get_running_loop() # Not usually needed directly
945
 
946
  try:
947
- # --- Setup and Initialize PTB Application ---
948
  ptb_app = await setup_bot_config()
949
  logger.info("PTB Application object configured. Initializing...")
950
- await ptb_app.initialize() # Initialize application components (e.g., bot instance)
951
  logger.info("PTB Application initialized. Starting background tasks (e.g., job queue)...")
952
- # Start PTB's internal tasks but not polling (we use webhook)
953
  await ptb_app.start()
954
- if ptb_app.updater: ptb_app.updater.stop() # Ensure polling is stopped if accidentally started
955
  bot_instance = ptb_app.bot
956
  bot_info = await bot_instance.get_me()
957
  logger.info(f"PTB Application started successfully. Bot ID: {bot_info.id}, Username: @{bot_info.username}")
958
 
959
- # --- Set Webhook ---
960
- # Ensure SPACE_HOST is correctly set in Hugging Face Space secrets
961
  WEBHOOK_URL_BASE = os.environ.get("SPACE_HOST")
962
  if WEBHOOK_URL_BASE:
963
- # Ensure it's a proper HTTPS URL
964
  if not WEBHOOK_URL_BASE.startswith("https://"): WEBHOOK_URL_BASE = f"https://{WEBHOOK_URL_BASE}"
965
- webhook_path = "/webhook" # Must match the route defined later
966
  full_webhook_url = f"{WEBHOOK_URL_BASE.rstrip('/')}{webhook_path}"
967
 
968
  logger.info(f"Attempting to set Telegram webhook to: {full_webhook_url}")
969
- # Short delay can sometimes help prevent race conditions on startup
970
  await asyncio.sleep(2.0)
971
  try:
972
- # Set the webhook, specifying allowed updates can reduce load
973
  await bot_instance.set_webhook(
974
  url=full_webhook_url,
975
- allowed_updates=Update.ALL_TYPES, # Or specify like [Update.MESSAGE, Update.CALLBACK_QUERY]
976
- # secret_token="YOUR_SECRET_TOKEN" # Recommended for security if possible
977
- # drop_pending_updates=True # Optional: Ignore updates sent while bot was down
978
  )
979
- # Verify webhook setup
980
  webhook_info = await bot_instance.get_webhook_info()
981
  if webhook_info.url == full_webhook_url:
982
  logger.info(f"Telegram webhook set successfully! Current info: {webhook_info}")
983
  else:
984
  logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', Got: {webhook_info.url}. Info: {webhook_info}")
985
  except RetryAfter as e:
986
- # This can happen if multiple workers try to set the webhook simultaneously
987
  logger.warning(f"Webhook setting throttled by Telegram (RetryAfter: {e.retry_after}s). Another instance likely succeeded or try again later.")
988
- # Optionally check info again after delay
989
  await asyncio.sleep(e.retry_after or 2)
990
  webhook_info = await bot_instance.get_webhook_info()
991
  logger.info(f"Webhook info after RetryAfter delay: {webhook_info}")
@@ -999,29 +875,24 @@ async def lifespan(app: Starlette):
999
 
1000
  except Exception as startup_err:
1001
  logger.critical(f"CRITICAL ERROR during ASGI application startup: {startup_err}", exc_info=True)
1002
- # Re-raise the exception to potentially stop the ASGI server from starting improperly
1003
  raise
1004
  finally:
1005
- # --- Shutdown Sequence ---
1006
  logger.info("ASGI Lifespan: Shutdown sequence initiated...")
1007
  if ptb_app:
1008
  bot_username = ptb_app.bot.username if ptb_app.bot else "N/A"
1009
  logger.info(f"PTB App instance found for @{bot_username}. Checking if running...")
1010
- # Check internal state if available (e.g., ptb_app.running might exist in future versions)
1011
- # Using _running is internal, but often the only way
1012
  is_running = getattr(ptb_app, '_running', False) or getattr(ptb_app, 'running', False)
1013
  if is_running:
1014
  try:
1015
  logger.info("Stopping PTB Application's background tasks...")
1016
- await ptb_app.stop() # Stop internal tasks like JobQueue
1017
  logger.info("Shutting down PTB Application connections and resources...")
1018
- await ptb_app.shutdown() # Clean up resources (e.g., close HTTPX client)
1019
  logger.info("PTB Application shut down gracefully.")
1020
  except Exception as shutdown_err:
1021
  logger.error(f"Error during PTB Application shutdown: {shutdown_err}", exc_info=True)
1022
  else:
1023
  logger.warning("PTB Application instance exists but was not marked as running at shutdown.")
1024
- # Attempt shutdown anyway just in case resources need cleaning
1025
  try: await ptb_app.shutdown()
1026
  except Exception: logger.error("Error during shutdown of non-running PTB app.", exc_info=True)
1027
  else:
@@ -1030,8 +901,6 @@ async def lifespan(app: Starlette):
1030
 
1031
 
1032
  # --- Flask App Setup (for Webhook Route) ---
1033
- # We use Flask just for its familiarity in defining the route,
1034
- # but it runs within Starlette's ASGI context via WSGIMiddleware.
1035
  flask_core_app = Flask(__name__)
1036
  logger.info("Core Flask app instance created (used by Starlette for routing).")
1037
 
@@ -1042,7 +911,6 @@ def index():
1042
  logger.debug("Health check endpoint '/' accessed.")
1043
  bot_status = "Unknown / Not Initialized"
1044
  if ptb_app and ptb_app.bot:
1045
- # Check internal state again (might have changed)
1046
  is_running = getattr(ptb_app, '_running', False) or getattr(ptb_app, 'running', False)
1047
  bot_status = f"Running (@{ptb_app.bot.username})" if is_running else f"Initialized/Stopped (@{ptb_app.bot.username})"
1048
  return f"Telegram Bot Summarizer - Status: {bot_status} - Listening via Starlette/Uvicorn."
@@ -1050,60 +918,43 @@ def index():
1050
  @flask_core_app.route('/webhook', methods=['POST'])
1051
  async def webhook() -> Response:
1052
  """Webhook endpoint called by Telegram."""
1053
- global ptb_app # Ensure we're using the global instance initialized by lifespan
1054
 
1055
  if not ptb_app:
1056
  logger.error("Webhook triggered, but PTB Application instance (ptb_app) is None. Lifespan likely failed.")
1057
- # Return 503 Service Unavailable
1058
  return Response('Bot service is not configured or failed during startup.', status=503)
1059
 
1060
- # Check internal state (safer than assuming ptb_app implies running)
1061
  is_running = getattr(ptb_app, '_running', False) or getattr(ptb_app, 'running', False)
1062
  if not is_running:
1063
  logger.error("Webhook triggered, but PTB Application is not currently running.")
1064
- # Return 503 Service Unavailable
1065
  return Response('Bot service is initialized but not actively running.', status=503)
1066
 
1067
- # Proceed with processing the update
1068
  logger.debug("Webhook endpoint received POST request from Telegram.")
1069
  try:
1070
- # Use Flask's request object to get JSON data
1071
  update_data = await request.get_json()
1072
  if not update_data:
1073
  logger.warning("Received empty or non-JSON data on webhook.")
1074
  return Response('Bad Request: Expected JSON payload.', status=400)
1075
 
1076
- # Deserialize JSON into a Telegram Update object
1077
  update = Update.de_json(update_data, ptb_app.bot)
1078
  logger.debug(f"Processing update_id: {update.update_id} via webhook route.")
1079
-
1080
- # Process the update using PTB's internal mechanisms
1081
- # This will dispatch it to the correct handler (CommandHandler, MessageHandler, etc.)
1082
  await ptb_app.process_update(update)
1083
-
1084
  logger.debug(f"Finished processing update_id: {update.update_id}")
1085
- # Return 200 OK to Telegram to acknowledge receipt
1086
  return Response('ok', status=200)
1087
 
1088
  except json.JSONDecodeError:
1089
  logger.error("Failed to decode JSON from Telegram webhook request.", exc_info=True)
1090
  return Response('Bad Request: Invalid JSON format.', status=400)
1091
  except Exception as e:
1092
- # Catch potential errors during Update.de_json or ptb_app.process_update
1093
  logger.error(f"Error processing update in webhook handler: {e}", exc_info=True)
1094
- # Return 500 Internal Server Error to Telegram
1095
- # Telegram will likely retry sending the update later
1096
  return Response('Internal Server Error processing update.', status=500)
1097
 
1098
 
1099
  # --- Create Starlette ASGI Application ---
1100
- # This is the main application object that Uvicorn/Gunicorn will run.
1101
  app = Starlette(
1102
- debug=False, # Set debug based on environment if needed, but generally False in prod
1103
- lifespan=lifespan, # Hook into the lifespan context manager for startup/shutdown
1104
  routes=[
1105
- # Mount the Flask app under the root path. Starlette handles requests
1106
- # and forwards relevant ones ('/') and ('/webhook') to the Flask app.
1107
  Mount("/", app=WSGIMiddleware(flask_core_app))
1108
  ]
1109
  )
@@ -1111,9 +962,6 @@ logger.info("Starlette ASGI application created, configured with lifespan and Fl
1111
 
1112
 
1113
  # --- Development Server Execution Block ---
1114
- # This block is ONLY for running the Flask app directly for basic testing
1115
- # WITHOUT the proper ASGI lifespan management (PTB won't start correctly here).
1116
- # DO NOT use this for deployment. Use `gunicorn main:app` or `uvicorn main:app`.
1117
  if __name__ == '__main__':
1118
  logger.warning("=" * 50)
1119
  logger.warning(" RUNNING SCRIPT DIRECTLY (using __main__) ".center(50, "="))
@@ -1129,9 +977,6 @@ if __name__ == '__main__':
1129
  if not TELEGRAM_TOKEN:
1130
  logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable missing. Aborting direct Flask start.")
1131
  else:
1132
- # Get port from environment or default to 8080 for local dev
1133
  local_port = int(os.environ.get('PORT', 8080))
1134
  logger.info(f"Starting Flask development server on http://0.0.0.0:{local_port}")
1135
- # Run the Flask app directly (no Starlette, no lifespan, no PTB)
1136
- # use_reloader=False is important if debugging PTB setup elsewhere
1137
  flask_core_app.run(host='0.0.0.0', port=local_port, debug=True, use_reloader=False)
 
1
+ # main.py (Revised: Corrected HTTPXRequest init for PTB v20 + Robust Callback Handling)
2
  import os
3
  import re
4
  import logging
 
28
  from telegram.request import HTTPXRequest # Import the request class
29
 
30
  # --- Other Libraries ---
31
+ import httpx # Keep import, might be useful elsewhere or if upgrading PTB later
32
  from youtube_transcript_api import YouTubeTranscriptApi
33
  import requests
34
  from bs4 import BeautifulSoup
 
62
  # --- Environment Variable Loading ---
63
  logger.info("Attempting to load secrets...")
64
  def get_secret(secret_name):
 
65
  value = os.environ.get(secret_name)
66
  if value: logger.info(f"Secret '{secret_name}': Found (Value length: {len(value)})")
67
  else: logger.warning(f"Secret '{secret_name}': Not Found")
 
79
  # (Keep ALL your functions: is_youtube_url, extract_youtube_id,
80
  # get_transcript_via_supadata, get_transcript_via_apify,
81
  # get_youtube_transcript, get_website_content_via_requests,
82
+ # get_website_content_via_urltotext_api, generate_summary - unchanged from previous version)
83
 
84
  # Helper Functions
85
  def is_youtube_url(url):
 
111
  params = {"videoId": video_id, "format": "text"}
112
  headers = {"X-API-Key": api_key}
113
  try:
 
114
  logger.warning("[Supadata] Making request with verify=False (Attempting to bypass SSL verification - Potential Security Risk)")
115
  response = await asyncio.to_thread(requests.get, api_endpoint, headers=headers, params=params, timeout=30, verify=False)
116
  logger.debug(f"[Supadata] Received status code {response.status_code} for {video_id}")
 
282
  if not url: logger.error("[Web Scraper - Requests/BS4] get_website_content_via_requests called with no URL"); return None
283
  logger.info(f"[Web Scraper - Requests/BS4] Attempting fetch: {url}")
284
  try:
 
285
  headers = {
286
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
287
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
288
  'Accept-Language': 'en-US,en;q=0.9',
289
  'Connection': 'keep-alive',
290
+ 'DNT': '1',
291
  'Upgrade-Insecure-Requests': '1'
292
  }
293
  response = await asyncio.to_thread(requests.get, url, headers=headers, timeout=25, allow_redirects=True)
294
+ response.raise_for_status()
295
  logger.debug(f"[Web Scraper - Requests/BS4] Status {response.status_code} for {url}")
296
 
297
  content_type = response.headers.get('content-type', '').lower()
298
  if 'html' not in content_type:
299
  logger.warning(f"[Web Scraper - Requests/BS4] Non-HTML content type received: {content_type}. Attempting plain text extraction.")
 
300
  if 'text/plain' in content_type and response.text:
301
  logger.info(f"[Web Scraper - Requests/BS4] Extracted plain text content. Length: {len(response.text.strip())}")
302
  return response.text.strip()
 
304
  return None
305
 
306
  soup = BeautifulSoup(response.text, 'html.parser')
 
 
307
  tags_to_remove = ["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "textarea", "select", "option", "label", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "video", "audio", "picture", "source"]
 
308
  selectors_to_remove = ['.ad', '#ad', '.ads', '#ads', '.advertisement', '#advertisement', '.banner', '#banner', '.menu', '#menu', '.navigation', '#navigation', '.sidebar', '#sidebar', '.social', '#social', '.share', '#share', '.related', '#related', '.comments', '#comments', '.cookie-consent', '#cookie-consent']
 
309
  for tag in soup(tags_to_remove): tag.decompose()
310
  for selector in selectors_to_remove:
311
  for element in soup.select(selector): element.decompose()
312
 
313
+ main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
 
 
 
 
 
 
 
 
314
  target_element = main_content if main_content else soup.body
315
  if not target_element:
316
  logger.warning(f"[Web Scraper - Requests/BS4] Could not find a suitable target element (main, article, body) for {url}");
317
  return None
318
 
 
319
  lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
320
+ text = "\n\n".join(lines)
321
 
322
+ MIN_TEXT_LENGTH = 100
323
  if not text or len(text) < MIN_TEXT_LENGTH:
324
  logger.warning(f"[Web Scraper - Requests/BS4] Extracted text is too short (<{MIN_TEXT_LENGTH} chars) after cleaning for {url}. Length: {len(text)}. Content might be JS-rendered or blocked.")
325
+ return None
 
326
 
327
  logger.info(f"[Web Scraper - Requests/BS4] Successfully scraped and cleaned content from {url}. Final Length: {len(text)}")
328
  return text
 
340
  if not api_key: logger.error("[Web Scraper - URLToText API] API key is missing."); return None
341
  logger.info(f"[Web Scraper - URLToText API] Attempting fetch via API: {url}")
342
  api_endpoint = "https://urltotext.com/api/v1/urltotext/"
 
343
  payload = json.dumps({
344
  "url": url,
345
  "output_format": "text",
346
+ "extract_main_content": True,
347
+ "render_javascript": True,
348
+ "residential_proxy": False,
349
+ "timeout_render": 20000,
350
  })
351
  headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"}
352
  try:
353
+ response = await asyncio.to_thread(requests.post, api_endpoint, headers=headers, data=payload, timeout=60)
354
  logger.debug(f"[Web Scraper - URLToText API] Received status code {response.status_code} for {url}")
355
  if response.status_code == 200:
356
  try:
 
359
  content = content_data.get("content")
360
  credits = data.get("credits_used", "N/A")
361
  warning = content_data.get("warning")
362
+ error_msg = content_data.get("error")
363
 
364
  if warning: logger.warning(f"[Web Scraper - URLToText API] API Warning for {url}: {warning}")
365
+ if error_msg: logger.error(f"[Web Scraper - URLToText API] API Error reported for {url}: {error_msg}"); return None
366
 
367
  if content and isinstance(content, str):
368
  logger.info(f"[Web Scraper - URLToText API] Successfully fetched content via API. Length: {len(content.strip())}. Credits Used: {credits}");
 
375
  elif response.status_code == 400: logger.error(f"[Web Scraper - URLToText API] Bad Request (400) to API. Check payload/URL. Response: {response.text[:200]}...")
376
  elif response.status_code == 401: logger.error(f"[Web Scraper - URLToText API] Unauthorized (401). Check API Key. Response: {response.text[:200]}...")
377
  elif response.status_code == 402: logger.error(f"[Web Scraper - URLToText API] Payment Required (402). Check API credits/plan. Response: {response.text[:200]}...")
378
+ elif response.status_code == 422: logger.warning(f"[Web Scraper - URLToText API] Unprocessable URL / Fetch Error (422) reported by API for {url}. Response: {response.text[:200]}...")
379
  elif response.status_code == 429: logger.warning(f"[Web Scraper - URLToText API] Rate Limit Hit (429). Response: {response.text[:200]}...")
380
  elif response.status_code >= 500: logger.error(f"[Web Scraper - URLToText API] API Server Error ({response.status_code}). Response: {response.text[:200]}...")
381
  else: logger.error(f"[Web Scraper - URLToText API] Unexpected status code {response.status_code} from API. Response: {response.text[:200]}...")
382
+ return None
383
  except requests.exceptions.Timeout: logger.error(f"[Web Scraper - URLToText API] Timeout connecting to API for {url}"); return None
384
  except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - URLToText API] Request error connecting to API: {e}"); return None
385
  except Exception as e: logger.error(f"[Web Scraper - URLToText API] Unexpected error during API call: {e}", exc_info=True); return None
 
392
  if not text or not text.strip(): logger.warning("generate_summary called with empty or whitespace-only text."); return "Error: No content was provided to summarize."
393
 
394
  openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
 
395
  model_name = "deepseek/deepseek-chat:free"
 
396
 
 
397
  if summary_type == "paragraph":
398
  system_message = (
399
  "You are an expert summarization AI. Your goal is to provide a concise, easy-to-understand summary of the provided text. "
 
427
  else:
428
  logger.error(f"Invalid summary_type '{summary_type}' requested.")
429
  return f"Error: Invalid summary type ('{summary_type}') requested. Please choose 'paragraph' or 'points'."
 
430
 
431
+ MAX_INPUT_TOKENS_ESTIMATE = 28000
 
 
 
432
  AVG_CHARS_PER_TOKEN = 4
433
  MAX_INPUT_LENGTH = MAX_INPUT_TOKENS_ESTIMATE * AVG_CHARS_PER_TOKEN
434
 
 
437
  truncation_marker = "\n\n[... Text truncated due to length ...]"
438
  text = text[:MAX_INPUT_LENGTH - len(truncation_marker)] + truncation_marker
439
 
 
440
  messages = [
441
  {"role": "system", "content": system_message},
442
  {"role": "user", "content": f"{user_prompt_instruction}\n\n--- TEXT TO SUMMARIZE ---\n\n{text}\n\n--- END OF TEXT ---"}
443
  ]
444
 
445
+ space_host = os.environ.get("SPACE_HOST", "huggingface.co/spaces/YOUR_SPACE_NAME")
 
446
  referer_url = f"https://{space_host}" if space_host and not space_host.startswith("http") else space_host or "https://huggingface.co"
447
  headers = {
448
  "Authorization": f"Bearer {api_key}",
449
  "Content-Type": "application/json",
450
  "HTTP-Referer": referer_url,
451
+ "X-Title": "Telegram URL Summarizer Bot"
452
  }
453
  payload = json.dumps({"model": model_name, "messages": messages})
454
 
455
  try:
456
  logger.debug(f"Sending request to OpenRouter (Model: {model_name}). Prompt length approx: {len(text)} chars.")
 
457
  response = await asyncio.to_thread(requests.post, openrouter_api_endpoint, headers=headers, data=payload, timeout=120)
458
  logger.debug(f"Received status {response.status_code} from OpenRouter.")
459
 
460
  if response.status_code == 200:
461
  try:
462
  data = response.json()
 
463
  choice = data.get("choices", [{}])[0]
464
  message = choice.get("message", {})
465
  summary = message.get("content")
 
468
  if summary and isinstance(summary, str) and summary.strip():
469
  summary = summary.strip()
470
  logger.info(f"Successfully generated summary. Finish Reason: {finish_reason}. Length: {len(summary)}")
471
+ if summary_type == "paragraph" and len(summary.split()) > 95:
 
472
  logger.warning(f"Generated paragraph summary slightly longer than target word count ({len(summary.split())} words).")
473
  return summary
474
  else:
 
482
  logger.error(f"Unexpected error processing OpenRouter success response: {e}", exc_info=True)
483
  return "Sorry, an unexpected error occurred while processing the AI response."
484
 
 
485
  elif response.status_code == 401: logger.error("OpenRouter API key is invalid (Unauthorized - 401)."); return "Error: AI service authentication failed. Please check the configuration."
486
  elif response.status_code == 402: logger.error("OpenRouter Payment Required (402). Check credits/limits."); return "Sorry, there's an issue with the AI service account limits or payment."
487
  elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Hit (429)."); return "Sorry, the AI model is currently busy due to high demand. Please try again in a moment."
488
  elif response.status_code == 400: logger.error(f"OpenRouter Bad Request (400). Likely prompt issue. Response: {response.text[:500]}..."); return "Sorry, the request to the AI service was invalid (possibly due to the content or prompt)."
489
  elif response.status_code >= 500: logger.error(f"OpenRouter Server Error ({response.status_code}). Response: {response.text[:500]}..."); return "Sorry, the AI service is experiencing internal issues. Please try again later."
490
  else:
 
491
  logger.error(f"Unexpected HTTP status {response.status_code} from OpenRouter. Response: {response.text[:500]}...")
492
+ try:
493
  error_data = response.json()
494
  error_msg = error_data.get("error", {}).get("message", response.text[:100])
495
  return f"Sorry, the AI service returned an error ({response.status_code}): {error_msg}"
 
506
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
507
  """Handles the /start command."""
508
  user = update.effective_user
509
+ if not user: return
510
  logger.info(f"User {user.id} ({user.username or 'NoUsername'}) initiated /start.")
 
511
  mention = user.mention_html() if user.username else user.first_name
512
  start_message = (
513
  f"👋 Hello {mention}!\n\n"
 
534
  "- **Length:** Very long articles or videos might be truncated before summarization to fit within processing limits.\n\n"
535
  "Just send a link to get started!"
536
  )
 
537
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
538
 
539
  async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
 
541
  if not update.message or not update.message.text: return
542
  message_text = update.message.text.strip()
543
  user = update.effective_user
544
+ if not user: return
545
 
 
 
546
  url_pattern = r'https?://(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}(?:/[^\s]*)?'
547
  match = re.search(url_pattern, message_text)
548
 
549
  if match:
550
  url = match.group(0)
551
  logger.info(f"User {user.id} sent potential URL: {url}")
 
 
552
  context.user_data['url_to_summarize'] = url
553
  logger.debug(f"Stored URL '{url}' in user_data for user {user.id}")
554
 
 
559
  ]
560
  ]
561
  reply_markup = InlineKeyboardMarkup(keyboard)
 
 
562
  await update.message.reply_text(
563
  f"✅ Link received:\n`{url}`\n\nChoose your desired summary format:",
564
  reply_markup=reply_markup,
565
  parse_mode=ParseMode.MARKDOWN,
566
+ link_preview_options={'is_disabled': True}
567
  )
568
  else:
 
 
 
569
  if not message_text.startswith('/'):
570
  await update.message.reply_text("Please send me a valid URL (starting with http:// or https://) to summarize.")
571
 
 
575
  query = update.callback_query
576
  if not query or not query.from_user:
577
  logger.warning("Callback query or user missing in update.")
578
+ return
579
  user = query.from_user
580
 
 
581
  try:
582
+ await query.answer()
583
  logger.debug(f"Answered callback query {query.id} for user {user.id}")
584
  except TimedOut:
 
585
  logger.warning(f"Timeout answering callback query {query.id} for user {user.id}. Processing continues.")
586
  except Exception as e:
 
587
  logger.error(f"Error answering callback query {query.id} for user {user.id}: {e!r}", exc_info=True)
588
 
589
+ summary_type = query.data
 
590
  url = context.user_data.get('url_to_summarize')
591
  logger.info(f"User {user.id} chose summary type '{summary_type}'. Checking for stored URL.")
592
 
593
  if not url:
594
  logger.warning(f"User {user.id} pressed button '{summary_type}', but NO URL found in user_data context.")
595
  try:
 
596
  await query.edit_message_text(text="⚠️ Oops! I lost the context for that link. Please send the link again.")
597
  except TimedOut:
598
  logger.error(f"Timeout trying to edit message to inform user {user.id} about lost context.")
599
  except Exception as edit_err:
 
600
  logger.error(f"Failed to edit message for lost context for user {user.id}: {edit_err}")
601
+ return
602
 
 
603
  logger.info(f"Processing URL '{url}' for user {user.id} with type '{summary_type}'.")
 
604
  context.user_data.pop('url_to_summarize', None)
605
  logger.debug(f"Cleared URL from user_data for user {user.id}")
606
 
 
607
  current_openrouter_key = os.environ.get('OPENROUTER_API_KEY')
608
  current_urltotext_key = os.environ.get('URLTOTEXT_API_KEY')
609
  current_supadata_key = os.environ.get('SUPADATA_API_KEY')
610
  current_apify_token = os.environ.get('APIFY_API_TOKEN')
 
611
  keys_present = f"OR={'Y' if current_openrouter_key else 'N'}, UTT={'Y' if current_urltotext_key else 'N'}, SD={'Y' if current_supadata_key else 'N'}, AP={'Y' if current_apify_token else 'N'}"
612
  logger.debug(f"API Key check for user {user.id} request: {keys_present}")
613
 
 
614
  if not current_openrouter_key:
615
  logger.error(f"CRITICAL: OpenRouter API key is missing. Cannot generate summary for user {user.id}.")
616
  try:
 
621
  logger.error(f"Failed to edit message for missing AI key for user {user.id}: {edit_err}")
622
  return
623
 
 
624
  processing_message_text = f"⏳ Working on your '{summary_type}' summary for the link...\n_(This might take up to a minute depending on the content)_"
625
+ message_to_edit = query.message
626
+ status_message_sent = None
627
 
628
  try:
629
  if message_to_edit:
630
  await query.edit_message_text(text=processing_message_text)
631
  logger.debug(f"Edited original message {message_to_edit.message_id} to show 'Working...' status for query {query.id}")
632
  else:
 
633
  logger.warning(f"Original message (query.message) not found for query {query.id}. Cannot edit, will send new status message.")
634
+ raise ValueError("Original message object missing")
635
  except (TimedOut, Exception) as e:
 
636
  logger.warning(f"Could not edit original message {message_to_edit.message_id if message_to_edit else 'N/A'} for query {query.id}: {e!r}. Attempting to send a new status message.")
637
+ message_to_edit = None
638
  try:
639
  status_message_sent = await context.bot.send_message(chat_id=user.id, text=processing_message_text)
640
  logger.debug(f"Sent new status message {status_message_sent.message_id} to user {user.id}.")
641
  except TimedOut:
642
  logger.error(f"Timeout sending NEW 'Working...' status message to user {user.id}. Processing continues without feedback.")
 
643
  except Exception as send_err:
644
  logger.error(f"Failed sending NEW 'Working...' status message to user {user.id}: {send_err}. Processing continues without feedback.")
 
645
 
 
646
  content = None
647
+ user_feedback_message = None
648
+ success = False
649
 
650
  try:
 
651
  try:
652
  logger.debug(f"Sending 'typing' chat action to chat {user.id}")
653
  await context.bot.send_chat_action(chat_id=user.id, action='typing')
654
  except TimedOut: logger.warning(f"Timeout sending 'typing' action for user {user.id}.")
655
  except Exception as ca_err: logger.warning(f"Failed sending 'typing' action for user {user.id}: {ca_err}")
656
 
 
657
  is_yt = is_youtube_url(url)
658
  logger.debug(f"URL ({url}) is YouTube: {is_yt} (User: {user.id})")
659
 
 
671
  logger.warning(f"Failed to extract YouTube video ID from URL: {url} (User: {user.id})")
672
  user_feedback_message = "⚠️ Sorry, I couldn't identify a valid YouTube video ID in the link you provided."
673
  else:
 
674
  logger.info(f"Attempting website scrape (Requests/BS4) for URL: {url} (User: {user.id})")
675
  content = await get_website_content_via_requests(url)
676
  if content:
677
  logger.info(f"Website scrape successful (Requests/BS4). Length: {len(content)} (User: {user.id})")
 
678
  else:
679
  logger.warning(f"Primary website scrape failed for {url} (User: {user.id}). Trying fallback API.")
680
  if current_urltotext_key:
 
681
  try: await context.bot.send_chat_action(chat_id=user.id, action='typing'); logger.debug("Sent typing before fallback scrape.")
682
+ except: pass
683
 
684
  logger.info(f"Attempting website scrape via URLToText API for: {url} (User: {user.id})")
685
  content = await get_website_content_via_urltotext_api(url, current_urltotext_key)
 
689
  logger.warning(f"Fallback website scrape (URLToText API) also failed for {url} (User: {user.id}).")
690
  user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website using available methods. It might be protected or structured in a way I can't parse."
691
  else:
 
692
  logger.warning(f"Primary scrape failed and URLToText API key not configured. Cannot fallback for {url} (User: {user.id}).")
693
  user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website, and the fallback service isn't configured."
694
 
 
695
  if content:
696
  logger.info(f"Content fetched (Length: {len(content)}). Generating '{summary_type}' summary for user {user.id}.")
 
697
  try: await context.bot.send_chat_action(chat_id=user.id, action='typing'); logger.debug("Sent typing before AI summary generation.")
698
  except: pass
699
 
700
  summary = await generate_summary(content, summary_type, current_openrouter_key)
701
 
 
702
  if summary.startswith("Error:") or summary.startswith("Sorry,"):
703
  logger.warning(f"AI summary generation failed for user {user.id}. Reason: {summary}")
704
+ user_feedback_message = f"⚠️ {summary}"
705
  else:
 
706
  logger.info(f"Summary generated successfully for user {user.id}. Length: {len(summary)}. Sending result.")
707
  try:
708
  await context.bot.send_message(
709
  chat_id=user.id,
710
  text=summary,
711
+ parse_mode=ParseMode.MARKDOWN,
712
  link_preview_options={'is_disabled': True}
713
  )
714
  success = True
715
+ user_feedback_message = None
716
  logger.info(f"Successfully sent summary to user {user.id}.")
717
  except TimedOut:
718
  logger.error(f"Timeout sending final summary message to user {user.id}.")
719
  user_feedback_message = "⚠️ Sorry, there was a timeout while trying to send you the final summary."
720
+ success = False
721
  except Exception as send_final_err:
722
  logger.error(f"Failed sending final summary to user {user.id}: {send_final_err}", exc_info=True)
723
  user_feedback_message = "⚠️ Sorry, an unexpected error occurred while sending the final summary."
724
+ success = False
725
 
726
  elif not user_feedback_message:
 
727
  logger.warning(f"Content retrieval resulted in None, but no specific user feedback message was set. URL: {url} (User: {user.id})")
728
  user_feedback_message = "⚠️ Sorry, I couldn't retrieve any usable content from the link provided."
729
 
 
730
  if user_feedback_message and not success:
731
  logger.warning(f"Processing failed or summary sending failed for user {user.id}. Sending feedback: {user_feedback_message}")
732
  try:
 
737
  logger.error(f"Failed sending final FAILURE feedback message to user {user.id}: {send_feedback_err}")
738
 
739
  except Exception as e:
 
740
  logger.error(f"Unexpected critical error during callback processing for user {user.id}, URL {url}: {e}", exc_info=True)
741
  try:
 
742
  await context.bot.send_message(chat_id=user.id, text="❌ Oops! An unexpected internal error occurred while processing your request. The issue has been logged.")
743
  except TimedOut:
744
  logger.error(f"Timeout sending CRITICAL internal error feedback message to user {user.id}.")
745
  except Exception as final_err:
 
746
  logger.error(f"Failed sending CRITICAL internal error feedback message to user {user.id}: {final_err}")
 
747
  success = False
748
 
749
  finally:
 
750
  logger.debug(f"Cleaning up status message(s) for user {user.id}, query {query.id}. Success={success}")
751
  try:
752
  if status_message_sent:
 
 
753
  await context.bot.delete_message(chat_id=user.id, message_id=status_message_sent.message_id)
754
  logger.debug(f"Deleted separate status message {status_message_sent.message_id} for user {user.id}.")
755
  elif message_to_edit:
 
756
  if success:
 
757
  await query.delete_message()
758
  logger.debug(f"Processing succeeded. Deleted original (edited) message {message_to_edit.message_id} for query {query.id}.")
759
  else:
 
 
 
 
 
760
  logger.debug(f"Processing failed. Leaving edited message {message_to_edit.message_id} in place for query {query.id}.")
 
 
 
 
 
 
761
 
762
  except TimedOut:
763
  logger.warning(f"Timeout attempting to delete status/button message for user {user.id}, query {query.id}.")
764
  except Exception as del_e:
 
 
765
  logger.warning(f"Could not delete status/button message for user {user.id}, query {query.id}: {del_e!r}")
766
 
 
767
  logger.info(f"Finished handling callback query {query.id} for user {user.id}. Overall Success: {success}")
768
 
769
 
770
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
771
  """Log Errors caused by Updates."""
772
  logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
 
773
  if isinstance(context.error, TimedOut):
774
  logger.warning("A timeout error occurred in PTB communication.")
775
  elif isinstance(context.error, NetworkError):
776
  logger.warning(f"A network error occurred: {context.error}")
 
777
 
778
+ # --- Bot Setup Function (Corrected: Removed invalid 'limits' param for PTB v20) ---
779
  async def setup_bot_config() -> Application:
780
+ """Configures the PTB Application with custom HTTPX settings for PTB v20.x."""
781
  logger.info("Configuring Telegram Application...")
782
  if not TELEGRAM_TOKEN:
783
  logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable not found.")
784
  raise ValueError("TELEGRAM_TOKEN environment variable not set.")
785
 
786
+ # --- Configure HTTPX client settings (Timeouts ONLY for PTB v20) ---
787
  connect_timeout = 10.0 # Slightly higher connect timeout
 
788
  read_timeout = 30.0 # Increased timeout for reading response
789
  write_timeout = 30.0 # Increased timeout for sending request
790
  pool_timeout = 30.0 # Increased timeout for getting connection from pool
791
+ # NOTE: PTB v20.x HTTPXRequest does not allow setting pool *size* directly here.
792
+ # It will use the httpx default (usually 10).
793
 
794
+ logger.info(f"Creating PTB HTTPXRequest (v20 compatible) with settings: "
795
  f"connect_timeout={connect_timeout}, read_timeout={read_timeout}, "
796
+ f"write_timeout={write_timeout}, pool_timeout={pool_timeout}. "
797
+ f"(Pool size uses httpx default)")
 
 
 
 
 
 
 
798
 
799
+ # Create a custom request object with ONLY the supported timeout parameters
800
  custom_request = HTTPXRequest(
801
  connect_timeout=connect_timeout,
802
  read_timeout=read_timeout,
803
  write_timeout=write_timeout,
804
  pool_timeout=pool_timeout,
805
+ # REMOVED: limits=custom_limits, <<<--- This was the error
806
+ http_version="1.1"
807
  )
808
 
809
  # Use Application.builder() and pass the custom request object
810
  application_builder = Application.builder().token(TELEGRAM_TOKEN)
811
  application_builder.request(custom_request)
812
+ # application_builder.get_updates_request(custom_request) # Apply if using polling
 
 
 
813
 
814
  # Build the application instance
815
  application = application_builder.build()
 
817
  # --- Register Handlers ---
818
  application.add_handler(CommandHandler("start", start))
819
  application.add_handler(CommandHandler("help", help_command))
 
820
  application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
 
821
  application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
 
822
  application.add_error_handler(error_handler)
823
 
824
  logger.info("Telegram application handlers configured.")
 
830
  """Handles PTB startup and shutdown during ASGI lifespan."""
831
  global ptb_app
832
  logger.info("ASGI Lifespan: Startup sequence initiated...")
 
833
 
834
  try:
 
835
  ptb_app = await setup_bot_config()
836
  logger.info("PTB Application object configured. Initializing...")
837
+ await ptb_app.initialize()
838
  logger.info("PTB Application initialized. Starting background tasks (e.g., job queue)...")
 
839
  await ptb_app.start()
840
+ if ptb_app.updater: ptb_app.updater.stop() # Ensure polling is stopped
841
  bot_instance = ptb_app.bot
842
  bot_info = await bot_instance.get_me()
843
  logger.info(f"PTB Application started successfully. Bot ID: {bot_info.id}, Username: @{bot_info.username}")
844
 
 
 
845
  WEBHOOK_URL_BASE = os.environ.get("SPACE_HOST")
846
  if WEBHOOK_URL_BASE:
 
847
  if not WEBHOOK_URL_BASE.startswith("https://"): WEBHOOK_URL_BASE = f"https://{WEBHOOK_URL_BASE}"
848
+ webhook_path = "/webhook"
849
  full_webhook_url = f"{WEBHOOK_URL_BASE.rstrip('/')}{webhook_path}"
850
 
851
  logger.info(f"Attempting to set Telegram webhook to: {full_webhook_url}")
 
852
  await asyncio.sleep(2.0)
853
  try:
 
854
  await bot_instance.set_webhook(
855
  url=full_webhook_url,
856
+ allowed_updates=Update.ALL_TYPES,
 
 
857
  )
 
858
  webhook_info = await bot_instance.get_webhook_info()
859
  if webhook_info.url == full_webhook_url:
860
  logger.info(f"Telegram webhook set successfully! Current info: {webhook_info}")
861
  else:
862
  logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', Got: {webhook_info.url}. Info: {webhook_info}")
863
  except RetryAfter as e:
 
864
  logger.warning(f"Webhook setting throttled by Telegram (RetryAfter: {e.retry_after}s). Another instance likely succeeded or try again later.")
 
865
  await asyncio.sleep(e.retry_after or 2)
866
  webhook_info = await bot_instance.get_webhook_info()
867
  logger.info(f"Webhook info after RetryAfter delay: {webhook_info}")
 
875
 
876
  except Exception as startup_err:
877
  logger.critical(f"CRITICAL ERROR during ASGI application startup: {startup_err}", exc_info=True)
 
878
  raise
879
  finally:
 
880
  logger.info("ASGI Lifespan: Shutdown sequence initiated...")
881
  if ptb_app:
882
  bot_username = ptb_app.bot.username if ptb_app.bot else "N/A"
883
  logger.info(f"PTB App instance found for @{bot_username}. Checking if running...")
 
 
884
  is_running = getattr(ptb_app, '_running', False) or getattr(ptb_app, 'running', False)
885
  if is_running:
886
  try:
887
  logger.info("Stopping PTB Application's background tasks...")
888
+ await ptb_app.stop()
889
  logger.info("Shutting down PTB Application connections and resources...")
890
+ await ptb_app.shutdown()
891
  logger.info("PTB Application shut down gracefully.")
892
  except Exception as shutdown_err:
893
  logger.error(f"Error during PTB Application shutdown: {shutdown_err}", exc_info=True)
894
  else:
895
  logger.warning("PTB Application instance exists but was not marked as running at shutdown.")
 
896
  try: await ptb_app.shutdown()
897
  except Exception: logger.error("Error during shutdown of non-running PTB app.", exc_info=True)
898
  else:
 
901
 
902
 
903
  # --- Flask App Setup (for Webhook Route) ---
 
 
904
  flask_core_app = Flask(__name__)
905
  logger.info("Core Flask app instance created (used by Starlette for routing).")
906
 
 
911
  logger.debug("Health check endpoint '/' accessed.")
912
  bot_status = "Unknown / Not Initialized"
913
  if ptb_app and ptb_app.bot:
 
914
  is_running = getattr(ptb_app, '_running', False) or getattr(ptb_app, 'running', False)
915
  bot_status = f"Running (@{ptb_app.bot.username})" if is_running else f"Initialized/Stopped (@{ptb_app.bot.username})"
916
  return f"Telegram Bot Summarizer - Status: {bot_status} - Listening via Starlette/Uvicorn."
 
918
  @flask_core_app.route('/webhook', methods=['POST'])
919
  async def webhook() -> Response:
920
  """Webhook endpoint called by Telegram."""
921
+ global ptb_app
922
 
923
  if not ptb_app:
924
  logger.error("Webhook triggered, but PTB Application instance (ptb_app) is None. Lifespan likely failed.")
 
925
  return Response('Bot service is not configured or failed during startup.', status=503)
926
 
 
927
  is_running = getattr(ptb_app, '_running', False) or getattr(ptb_app, 'running', False)
928
  if not is_running:
929
  logger.error("Webhook triggered, but PTB Application is not currently running.")
 
930
  return Response('Bot service is initialized but not actively running.', status=503)
931
 
 
932
  logger.debug("Webhook endpoint received POST request from Telegram.")
933
  try:
 
934
  update_data = await request.get_json()
935
  if not update_data:
936
  logger.warning("Received empty or non-JSON data on webhook.")
937
  return Response('Bad Request: Expected JSON payload.', status=400)
938
 
 
939
  update = Update.de_json(update_data, ptb_app.bot)
940
  logger.debug(f"Processing update_id: {update.update_id} via webhook route.")
 
 
 
941
  await ptb_app.process_update(update)
 
942
  logger.debug(f"Finished processing update_id: {update.update_id}")
 
943
  return Response('ok', status=200)
944
 
945
  except json.JSONDecodeError:
946
  logger.error("Failed to decode JSON from Telegram webhook request.", exc_info=True)
947
  return Response('Bad Request: Invalid JSON format.', status=400)
948
  except Exception as e:
 
949
  logger.error(f"Error processing update in webhook handler: {e}", exc_info=True)
 
 
950
  return Response('Internal Server Error processing update.', status=500)
951
 
952
 
953
  # --- Create Starlette ASGI Application ---
 
954
  app = Starlette(
955
+ debug=False,
956
+ lifespan=lifespan,
957
  routes=[
 
 
958
  Mount("/", app=WSGIMiddleware(flask_core_app))
959
  ]
960
  )
 
962
 
963
 
964
  # --- Development Server Execution Block ---
 
 
 
965
  if __name__ == '__main__':
966
  logger.warning("=" * 50)
967
  logger.warning(" RUNNING SCRIPT DIRECTLY (using __main__) ".center(50, "="))
 
977
  if not TELEGRAM_TOKEN:
978
  logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable missing. Aborting direct Flask start.")
979
  else:
 
980
  local_port = int(os.environ.get('PORT', 8080))
981
  logger.info(f"Starting Flask development server on http://0.0.0.0:{local_port}")
 
 
982
  flask_core_app.run(host='0.0.0.0', port=local_port, debug=True, use_reloader=False)