Spaces:

fmab777
/

telegram-summary-bot

Paused

App Files Files Community

telegram-summary-bot / main.py

fmab777

Update main.py

f55e243 verified 9 months ago

raw

history blame

48.7 kB

	# main.py (Applying fixes for apparent_encoding, bot cleanup, and Apify actor name)
	import os
	import re
	import logging
	import asyncio
	import json
	import html
	import contextlib
	import traceback
	from typing import Optional, Dict, Any

	# --- Frameworks ---
	from starlette.applications import Starlette
	from starlette.routing import Route
	from starlette.responses import PlainTextResponse, JSONResponse, Response
	from starlette.requests import Request

	# --- Telegram Bot ---
	from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup, Bot
	from telegram.ext import (
	Application,
	CommandHandler,
	MessageHandler,
	filters,
	ContextTypes,
	CallbackQueryHandler,
	)
	from telegram.constants import ParseMode
	from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest, TelegramError
	from telegram.request import HTTPXRequest, BaseRequest # Import BaseRequest for type hinting

	# --- Other Libraries ---
	import httpx
	from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
	# Make requests optional if only used for sync fallback (currently not)
	# import requests
	from bs4 import BeautifulSoup
	from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type, before_sleep_log
	# Optional: Import lxml if installed (usually faster parsing)
	try:
	import lxml
	DEFAULT_PARSER = 'lxml'
	except ImportError:
	DEFAULT_PARSER = 'html.parser'


	_apify_token_exists = bool(os.environ.get('APIFY_API_TOKEN'))
	if _apify_token_exists:
	from apify_client import ApifyClient
	from apify_client.consts import ActorJobStatus
	from apify_client.errors import ApifyApiError # Import specific error
	else:
	ApifyClient = None # type: ignore
	ApifyApiError = None # type: ignore


	# --- Logging Setup ---
	logging.basicConfig(
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	level=logging.INFO
	)
	logging.getLogger("httpx").setLevel(logging.WARNING)
	if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
	logging.getLogger("telegram.ext").setLevel(logging.INFO)
	logging.getLogger('telegram.bot').setLevel(logging.INFO)
	logging.getLogger("urllib3").setLevel(logging.INFO)
	logging.getLogger('gunicorn.error').setLevel(logging.INFO)
	logging.getLogger('uvicorn').setLevel(logging.INFO)
	logging.getLogger('starlette').setLevel(logging.INFO)
	logger = logging.getLogger(__name__)
	logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")

	# --- Global variable for PTB app ---
	ptb_app: Optional[Application] = None

	# --- Environment Variable Loading ---
	logger.info("Attempting to load secrets...")
	def get_secret(secret_name):
	value = os.environ.get(secret_name)
	# Avoid logging full length of very long secrets like Supabase keys
	log_length = min(len(value), 8) if value else 0
	status = "Found" if value else "Not Found"
	logger.info(f"Secret '{secret_name}': {status} (Value starts with: {value[:log_length]}...)")
	return value

	TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
	OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY')
	URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
	SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
	APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
	WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET') # Added for webhook security
	OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "anthropic/claude-3.5-sonnet")
	APIFY_ACTOR_NAME = os.environ.get("APIFY_ACTOR_NAME", "pocesar/youtube-scraper") # Use env var or default
	logger.info("Secret loading attempt finished.")
	logger.info(f"Using OpenRouter Model: {OPENROUTER_MODEL}")
	if _apify_token_exists:
	logger.info(f"Using Apify Actor: {APIFY_ACTOR_NAME}")


	# --- Retry Decorator for Bot Operations ---
	@retry(
	stop=stop_after_attempt(4),
	wait=wait_exponential(multiplier=1, min=2, max=15),
	retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)),
	before_sleep=before_sleep_log(logger, logging.WARNING),
	reraise=True
	)
	async def retry_bot_operation(func, args, *kwargs):
	"""Wrapper to retry bot operations with exponential backoff."""
	try:
	return await func(args, *kwargs)
	except BadRequest as e:
	# Added specific check for common, non-fatal BadRequests
	ignore_errors = [
	"message is not modified",
	"query is too old",
	"message to edit not found",
	"chat not found", # Might indicate user blocked bot, non-retryable
	"bot was blocked by the user",
	]
	if any(err in str(e).lower() for err in ignore_errors):
	logger.warning(f"Ignoring non-critical BadRequest during bot operation: {e}")
	return None # Indicate no action needed or failed benignly
	logger.error(f"Potentially critical BadRequest during bot operation: {e}")
	raise # Reraise other BadRequests (might be retryable by tenacity)
	except TelegramError as e:
	logger.warning(f"TelegramError during bot operation (will retry if applicable): {e}")
	raise
	except Exception as e:
	logger.error(f"Unexpected error during bot operation: {e}", exc_info=True)
	raise


	# --- Helper Functions ---
	def is_youtube_url(url):
	"""Checks if the URL is a valid YouTube video or shorts URL."""
	youtube_regex = re.compile(
	r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com\|youtu\.be)/'
	r'(?:watch\?v=\|embed/\|v/\|shorts/\|live/\|attribution_link\?a=.*&u=/watch\?v=)?'
	r'([\w-]{11})'
	r'(?:\S+)?',
	re.IGNORECASE)
	match = youtube_regex.search(url)
	logger.debug(f"is_youtube_url check for '{url}': {'Match found' if match else 'No match'}")
	return bool(match)

	def extract_youtube_id(url):
	"""Extracts the YouTube video ID from a URL."""
	youtube_regex = re.compile(
	r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com\|youtu\.be)/'
	r'(?:watch\?v=\|embed/\|v/\|shorts/\|live/\|attribution_link\?a=.*&u=/watch\?v=)?'
	r'([\w-]{11})'
	r'(?:\S+)?',
	re.IGNORECASE)
	match = youtube_regex.search(url)
	if match:
	video_id = match.group(1)
	logger.debug(f"Extracted YouTube ID '{video_id}' from URL: {url}")
	return video_id
	else:
	logger.warning(f"Could not extract YouTube ID from URL: {url}")
	return None

	# --- Content Fetching Functions ---

	# Using httpx for async requests
	async def fetch_url_content(url: str, timeout: int = 20) -> Optional[str]:
	"""Fetches content from a URL using httpx asynchronously."""
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', # Updated UA
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.9',
	'Connection': 'keep-alive',
	}
	try:
	async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers, http2=True) as client: # Enable HTTP/2
	response = await client.get(url)
	response.raise_for_status()
	# * FIX: Use response.encoding or response.charset_encoding *
	# response.encoding will try to decode based on headers/content
	# If it fails, default to utf-8
	try:
	# Accessing .text forces encoding detection
	content = response.text
	logger.debug(f"Detected encoding for {url}: {response.encoding}")
	return content
	except UnicodeDecodeError:
	logger.warning(f"UnicodeDecodeError for {url} with encoding {response.encoding}. Trying raw bytes with utf-8.")
	# Fallback: read bytes and decode utf-8 ignoring errors
	return response.content.decode('utf-8', errors='ignore')
	except Exception as e:
	logger.error(f"Error decoding response for {url}: {e}")
	return None # Cannot decode reliably

	except httpx.HTTPStatusError as e:
	logger.error(f"HTTP error fetching {url}: {e.response.status_code} - {e}")
	except httpx.ConnectError as e:
	# Catch specific connection errors like DNS failures
	logger.error(f"Connection error fetching {url}: {e}")
	except httpx.TimeoutException as e:
	logger.error(f"Timeout error fetching {url}: {e}")
	except httpx.RequestError as e:
	logger.error(f"Request error fetching {url}: {e}")
	except Exception as e:
	logger.error(f"Unexpected error fetching {url}: {e}", exc_info=True)
	return None


	async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
	"""Fetches YouTube transcript using Supadata API."""
	if not api_key: return None
	api_url = f"https://api.supadata.net/youtube/transcript?video_id={video_id}"
	headers = {'X-API-Key': api_key, 'Accept': 'application/json'}
	logger.info(f"Attempting transcript fetch via Supadata for {video_id}")
	try:
	# Note: If CERTIFICATE_VERIFY_FAILED persists, it's an issue with api.supadata.net's cert.
	# Do NOT disable verification (verify=False) unless absolutely necessary and understood.
	async with httpx.AsyncClient(timeout=30.0) as client:
	response = await client.get(api_url, headers=headers)
	response.raise_for_status()
	data = response.json()
	if data and isinstance(data, list) and data[0].get("text"):
	transcript = " ".join([item["text"] for item in data if "text" in item])
	logger.info(f"Supadata transcript fetched successfully for {video_id} (length: {len(transcript)})")
	return transcript
	else:
	logger.warning(f"Supadata response format unexpected or empty for {video_id}: {data}")
	return None
	except httpx.ConnectError as e:
	# Log SSL verification errors specifically if they occur
	if "CERTIFICATE_VERIFY_FAILED" in str(e):
	logger.error(f"Supadata API SSL certificate verification failed for {video_id}: {e}. This is likely an issue with api.supadata.net's certificate.")
	else:
	logger.error(f"Supadata API connection error for {video_id}: {e}")
	except httpx.HTTPStatusError as e:
	logger.error(f"Supadata API HTTP error for {video_id}: {e.response.status_code} - {e}")
	except Exception as e:
	logger.error(f"Error fetching transcript via Supadata for {video_id}: {e}", exc_info=True)
	return None

	async def get_transcript_via_apify(video_id: str, api_token: str) -> Optional[str]:
	"""Fetches YouTube transcript using Apify YouTube Scraper Actor."""
	global APIFY_ACTOR_NAME # Use the globally configured/default actor name
	if not ApifyClient or not api_token: return None
	logger.info(f"Attempting transcript fetch via Apify (Actor: {APIFY_ACTOR_NAME}) for {video_id}")
	try:
	client = ApifyClient(api_token)
	# * FIX: Use the correct actor name *
	actor = client.actor(APIFY_ACTOR_NAME)
	if not actor:
	logger.error(f"Could not find Apify actor: {APIFY_ACTOR_NAME}")
	return None

	actor_run = await asyncio.to_thread(
	actor.call, # Run blocking call in thread
	run_input={
	"startUrls": [{"url": f"https://www.youtube.com/watch?v={video_id}"}], # Use correct input format if needed
	"maxResultStreams": 0,
	"maxResults": 1, # Only need info for one video
	"maxResultCommentStreams": 0,
	"proxyConfiguration": {"useApifyProxy": True},
	"subtitles": True, # Explicitly request subtitles/transcript
	"maxDurationMinutes": 0, # No duration limit
	"skipComments": True,
	# Check actor docs for exact input schema
	},
	timeout_secs=120, # Timeout for the call itself
	wait_secs=120 # Timeout for waiting for run completion
	)

	if not actor_run or 'defaultDatasetId' not in actor_run:
	logger.warning(f"Apify actor run did not return expected dataset ID for {video_id}. Run details: {actor_run}")
	return None

	logger.info(f"Apify actor run started/retrieved for {video_id}. Dataset ID: {actor_run['defaultDatasetId']}")

	# Fetch results from the dataset
	dataset = client.dataset(actor_run["defaultDatasetId"])
	# Run list_items in thread as it can be blocking I/O
	dataset_page = await asyncio.to_thread(dataset.list_items, limit=5) # Limit items fetched initially

	if dataset_page and dataset_page.items:
	for item in dataset_page.items:
	# Apify output structure can vary; adapt as needed
	transcript_text = item.get('transcript') # Check common keys
	if not transcript_text and 'subtitles' in item: # Check alternative
	if isinstance(item['subtitles'], list) and len(item['subtitles']) > 0:
	transcript_text = " ".join(line.get('text', '') for line in item['subtitles'][0].get('lines', []))
	elif isinstance(item['subtitles'], str): # Sometimes it's just a string
	transcript_text = item['subtitles']

	if transcript_text and isinstance(transcript_text, str) and transcript_text.strip():
	logger.info(f"Apify transcript fetched successfully for {video_id} (length: {len(transcript_text)})")
	return transcript_text.strip()

	logger.warning(f"Apify run completed for {video_id}, but no transcript found in dataset items.")
	else:
	logger.warning(f"Apify run completed for {video_id}, but dataset was empty or inaccessible.")

	except ApifyApiError as e:
	# Catch specific Apify errors like "Actor not found"
	logger.error(f"Apify API error fetching transcript for {video_id} (Actor: {APIFY_ACTOR_NAME}): {e}")
	except Exception as e:
	logger.error(f"Unexpected error fetching transcript via Apify for {video_id}: {e}", exc_info=True)
	return None


	async def get_youtube_transcript(video_id: str, url: str, supadata_key: Optional[str], apify_token: Optional[str]) -> Optional[str]:
	"""Tries different methods to get a YouTube transcript."""
	transcript = None

	# 1. Try Supadata API (if key exists)
	if supadata_key:
	transcript = await get_transcript_via_supadata(video_id, supadata_key)
	if transcript: return transcript

	# 2. Try youtube-transcript-api (Direct method)
	logger.info(f"Attempting transcript fetch via youtube-transcript-api for {video_id}")
	try:
	transcript_list = await asyncio.to_thread(YouTubeTranscriptApi.get_transcript, video_id)
	transcript = " ".join([item['text'] for item in transcript_list])
	logger.info(f"youtube-transcript-api transcript fetched successfully for {video_id} (length: {len(transcript)})")
	return transcript
	except (TranscriptsDisabled, NoTranscriptFound):
	logger.warning(f"Transcripts disabled or unavailable via youtube-transcript-api for {video_id}.")
	except Exception as e:
	logger.error(f"Error using youtube-transcript-api for {video_id}: {e}")

	# 3. Try Apify (if token exists and other methods failed)
	if not transcript and apify_token:
	transcript = await get_transcript_via_apify(video_id, apify_token)
	if transcript: return transcript

	logger.warning(f"Failed to retrieve transcript for YouTube video {video_id} using all available methods.")
	return None

	async def get_website_content_via_requests(url: str) -> Optional[str]:
	"""Fetches and extracts main text content from a website using BeautifulSoup."""
	logger.info(f"Attempting website scrape via requests/BeautifulSoup for: {url}")
	html_content = await fetch_url_content(url)
	if not html_content:
	return None

	try:
	def parse_html(content):
	# Use lxml if available, otherwise html.parser
	soup = BeautifulSoup(content, DEFAULT_PARSER)
	for script_or_style in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "iframe"]):
	script_or_style.decompose()
	# Consider targeting specific elements like <article>, <main>, .post-content etc.
	main_content = soup.find('article') or soup.find('main') or soup.body
	if not main_content: main_content = soup # Fallback to whole soup if no main tags

	text = main_content.get_text(separator='\n', strip=True)
	lines = (line.strip() for line in text.splitlines())
	chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
	text = '\n'.join(chunk for chunk in chunks if chunk)
	return text

	text_content = await asyncio.to_thread(parse_html, html_content)

	if text_content and len(text_content) > 100:
	logger.info(f"Successfully scraped content via requests/BeautifulSoup for {url} (length: {len(text_content)})")
	return text_content
	else:
	logger.warning(f"Scraping via requests/BeautifulSoup for {url} yielded minimal content (length: {len(text_content) if text_content else 0}).")
	return None
	except Exception as e:
	logger.error(f"Error parsing website content with BeautifulSoup for {url}: {e}", exc_info=True)
	return None

	async def get_website_content_via_urltotext_api(url: str, api_key: str) -> Optional[str]:
	"""Fetches website content using the UrlToText API."""
	if not api_key: return None
	api_endpoint = "https://api.urltotext.ai/text"
	headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
	payload = {"url": url, "text_only": True}
	logger.info(f"Attempting website content fetch via UrlToText API for: {url}")

	try:
	async with httpx.AsyncClient(timeout=45.0) as client:
	response = await client.post(api_endpoint, headers=headers, json=payload)
	response.raise_for_status()
	data = response.json()
	if "text" in data and data["text"]:
	content = data["text"]
	logger.info(f"Successfully fetched content via UrlToText API for {url} (length: {len(content)})")
	return content
	else:
	logger.warning(f"UrlToText API response did not contain text for {url}. Response: {data}")
	return None
	except httpx.ConnectError as e:
	# Catch DNS error specifically if needed, but general ConnectError covers it
	logger.error(f"UrlToText API connection error for {url}: {e}. Check network/DNS.")
	except httpx.HTTPStatusError as e:
	logger.error(f"UrlToText API HTTP error for {url}: {e.response.status_code} - {e}")
	except Exception as e:
	logger.error(f"Error fetching content via UrlToText API for {url}: {e}", exc_info=True)
	return None

	# --- Summarization Function ---
	async def generate_summary(content: str, summary_type: str, api_key: Optional[str]) -> str:
	"""Generates a summary using OpenRouter API."""
	global OPENROUTER_MODEL # Use the globally configured/default model
	if not api_key:
	return "Error: OpenRouter API key is not configured."
	if not content:
	return "Error: No content provided to summarize."

	if len(content) < 50:
	return "The provided content is too short to summarize effectively."

	max_chars = 100000
	if len(content) > max_chars:
	logger.warning(f"Content length ({len(content)}) exceeds max_chars ({max_chars}), truncating.")
	content = content[:max_chars]

	prompt_template = """
	Please summarize the following text. The summary should capture the key points and main ideas accurately and concisely.
	Provide the summary in {format_style} format.

	Text to summarize:
	---
	{text}
	---

	Summary ({format_style}):
	"""
	format_style = "a concise paragraph" if summary_type == "paragraph" else "bullet points (using * or - for each point)"
	prompt = prompt_template.format(text=content, format_style=format_style)

	logger.info(f"Sending request to OpenRouter (Model: {OPENROUTER_MODEL}) for {summary_type} summary.")

	try:
	async with httpx.AsyncClient(timeout=120.0) as client:
	response = await client.post(
	url="https://openrouter.ai/api/v1/chat/completions",
	headers={
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json",
	# Optional: Add custom site identifier
	# "HTTP-Referer": "YOUR_SITE_URL",
	# "X-Title": "Telegram Summarizer Bot"
	},
	json={
	"model": OPENROUTER_MODEL,
	"messages": [{"role": "user", "content": prompt}],
	"max_tokens": 1024, # Adjust based on expected summary length
	# Optional: Add temperature, top_p etc. if needed
	},
	)
	response.raise_for_status()
	data = response.json()

	if data.get("choices") and len(data["choices"]) > 0:
	summary = data["choices"][0].get("message", {}).get("content", "").strip()
	if summary:
	logger.info(f"Summary generated successfully (length: {len(summary)})")
	# More robust Markdown escaping needed for PTB's MarkdownV2
	# For simple Markdown, basic escaping might suffice, but V2 is safer
	# summary = escape_markdown(summary) # Implement or import escape_markdown if using V2
	# Basic escaping for ParseMode.MARKDOWN:
	summary = summary.replace('_', r'\_').replace('', r'\').replace('[', r'\[').replace('`', r'\`')
	return summary
	else:
	logger.error("OpenRouter response successful, but summary content is empty.")
	return "Sorry, the AI generated an empty summary. Please try again."
	else:
	# Log the error details if available in the response
	error_details = data.get("error")
	logger.error(f"OpenRouter response format unexpected or error: {error_details or data}")
	return f"Sorry, I received an unexpected response or error from the summarization service: {error_details}"

	except httpx.HTTPStatusError as e:
	error_body = ""
	try: error_body = e.response.text
	except Exception: pass
	logger.error(f"OpenRouter API HTTP error: {e.response.status_code} - {e}. Response body: {error_body}")
	return f"Sorry, there was an error communicating with the summarization service (HTTP {e.response.status_code})."
	except Exception as e:
	logger.error(f"Error generating summary via OpenRouter: {e}", exc_info=True)
	return "Sorry, an unexpected error occurred while generating the summary."


	# --- Background Task Processing ---
	async def process_summary_task(
	user_id: int,
	chat_id: int,
	message_id_to_edit: Optional[int],
	url: str,
	summary_type: str,
	bot_token: str
	) -> None:
	"""Handles the actual fetching and summarization in a background task."""
	task_id = f"{user_id}-{message_id_to_edit or 'new'}"
	logger.info(f"[Task {task_id}] Starting processing for URL: {url}")

	# Create a new request handler and bot instance for this task
	background_request: Optional[BaseRequest] = None
	bot: Optional[Bot] = None
	try:
	background_request = HTTPXRequest(
	connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0, http_version="1.1"
	)
	bot = Bot(token=bot_token, request=background_request)
	except Exception as e:
	logger.critical(f"[Task {task_id}] Failed to create background bot instance: {e}", exc_info=True)
	# Cannot proceed without a bot instance
	return # Or raise? Silently failing might hide issues.

	content = None
	user_feedback_message = None
	success = False
	final_summary = ""
	status_message_id = message_id_to_edit

	try:
	# --- Inform User Processing Has Started ---
	processing_message_text = f"⏳ Working on your '{summary_type}' summary for:\n`{url}`\n\n_(Fetching & summarizing...)_"
	if status_message_id:
	try:
	await retry_bot_operation(
	bot.edit_message_text, chat_id=chat_id, message_id=status_message_id,
	text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None
	)
	logger.debug(f"[Task {task_id}] Successfully edited message {status_message_id} to 'Processing'")
	except Exception as e:
	logger.warning(f"[Task {task_id}] Could not edit original message {status_message_id}: {e}. Will send a new status message.")
	status_message_id = None
	if not status_message_id:
	try:
	status_message = await retry_bot_operation(
	bot.send_message, chat_id=chat_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN
	)
	if status_message: # Check if message was actually sent (retry might return None on ignore)
	status_message_id = status_message.message_id
	logger.debug(f"[Task {task_id}] Sent new status message {status_message_id}")
	else:
	logger.error(f"[Task {task_id}] Failed to send new status message after retries.")
	raise RuntimeError("Failed to send initial status message")
	except Exception as e:
	logger.error(f"[Task {task_id}] Failed to send new status message: {e}")
	raise RuntimeError("Failed to send initial status message") from e

	# --- Main Content Fetching and Summarization ---
	try:
	await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
	is_yt = is_youtube_url(url)
	logger.debug(f"[Task {task_id}] URL is YouTube: {is_yt}")
	if is_yt:
	video_id = extract_youtube_id(url)
	if video_id:
	logger.info(f"[Task {task_id}] Fetching YouTube transcript for {video_id}")
	content = await get_youtube_transcript(video_id, url, SUPADATA_API_KEY, APIFY_API_TOKEN)
	if not content: user_feedback_message = "⚠️ Sorry, I couldn't retrieve the transcript for that YouTube video. It might be disabled or unavailable."
	else: user_feedback_message = "⚠️ Couldn't extract a valid YouTube video ID from the link."
	else:
	logger.info(f"[Task {task_id}] Attempting website scrape for: {url}")
	content = await get_website_content_via_requests(url)
	if not content and URLTOTEXT_API_KEY:
	logger.info(f"[Task {task_id}] Basic scrape failed/insufficient, trying UrlToText API...")
	await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
	content = await get_website_content_via_urltotext_api(url, URLTOTEXT_API_KEY)
	if not content: user_feedback_message = "⚠️ Sorry, I couldn't fetch or extract meaningful content from that website."

	if content:
	logger.info(f"[Task {task_id}] Content fetched (length: {len(content)}). Generating '{summary_type}' summary.")
	await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
	final_summary = await generate_summary(content, summary_type, OPENROUTER_API_KEY)
	if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
	user_feedback_message = f"⚠️ {final_summary}"
	else: success = True
	# If content fetching failed, user_feedback_message is already set

	except Exception as e:
	logger.error(f"[Task {task_id}] Error during content fetching or summarization: {e}", exc_info=True)
	user_feedback_message = "❌ An unexpected error occurred while processing your request."

	# --- Send Final Result or Error ---
	if success and final_summary:
	max_length = 4096
	summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
	await retry_bot_operation(
	bot.send_message, chat_id=chat_id, text=summary_parts[0],
	parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True}
	)
	for part in summary_parts[1:]:
	await asyncio.sleep(0.5)
	await retry_bot_operation(
	bot.send_message, chat_id=chat_id, text=part,
	parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True}
	)
	logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts).")
	elif user_feedback_message:
	logger.warning(f"[Task {task_id}] Sending feedback/error message: {user_feedback_message}")
	await retry_bot_operation(
	bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True}
	)
	else:
	logger.error(f"[Task {task_id}] Reached end of task without success or specific error message.")
	await retry_bot_operation(
	bot.send_message, chat_id=chat_id, text="❓ Something went wrong, but no specific error was identified.",
	link_preview_options={'is_disabled': True}
	)

	except Exception as e:
	logger.critical(f"[Task {task_id}] Critical error within task processing: {e}", exc_info=True)
	try:
	# Use the bot instance created at the start of the task if available
	if bot:
	await retry_bot_operation(
	bot.send_message, chat_id=chat_id,
	text="❌ A critical internal error occurred. Please report this if it persists."
	)
	else:
	logger.error("[Task ??] Cannot send critical error message: Bot instance not available.")
	except Exception:
	logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
	finally:
	# --- Clean up Status Message ---
	if status_message_id and bot: # Ensure bot exists before trying to delete
	try:
	await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=status_message_id)
	logger.debug(f"[Task {task_id}] Deleted status message {status_message_id}")
	except Exception as e:
	# Log benignly if deletion fails (e.g., message already deleted)
	logger.warning(f"[Task {task_id}] Failed to delete status message {status_message_id}: {e}")

	# --- Clean up Background Bot's HTTPX Client ---
	# * FIX: Correct way to close client for manually created Bot *
	if background_request and hasattr(background_request, '_client') and background_request._client:
	try:
	await background_request._client.aclose()
	logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
	except Exception as e:
	logger.warning(f"[Task {task_id}] Error closing background bot's HTTPX client: {e}")
	else:
	logger.debug(f"[Task {task_id}] Background bot's HTTPX client already closed or not found.")

	logger.info(f"[Task {task_id}] Task completed. Success: {success}")


	# --- Telegram Bot Handlers (Mostly Unchanged) ---
	async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
	"""Handles the /start command."""
	user = update.effective_user
	if not user or not update.message: return
	logger.info(f"User {user.id} initiated /start.")
	mention = user.mention_html()
	start_message = (
	f"👋 Hello {mention}!\n\n"
	"I can summarise YouTube videos or web articles for you.\n\n"
	"Just send me a link (URL) and I'll ask you whether you want the summary as a paragraph or bullet points.\n\n"
	"Type /help for more details."
	)
	await update.message.reply_html(start_message)

	async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
	"""Handles the /help command."""
	user = update.effective_user
	if not user or not update.message: return
	logger.info(f"User {user.id} requested /help.")
	help_text = (
	"How to Use Me:\n"
	"1. Send me a direct link (URL) to a YouTube video or a web article.\n"
	"2. I will ask you to choose the summary format: `Paragraph` or `Points`.\n"
	"3. Click the button for your preferred format.\n"
	"4. I'll fetch the content, summarise it using AI, and send it back to you!\n\n"
	"Important Notes:\n"
	"- YouTube: Transcript availability varies. I try multiple methods.\n"
	"- Websites: I attempt basic scraping and can use UrlToText API (if configured) for complex sites.\n"
	"- AI Summaries: Provided by OpenRouter (using model: `{model}`). Accuracy may vary.\n"
	"- Length Limits: Very long content might be truncated.\n\n"
	"Just send a link to get started!"
	).format(model=OPENROUTER_MODEL) # Show the model being used
	await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)

	async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
	"""Handles messages containing potential URLs."""
	if not update.message or not update.message.text: return
	message_text = update.message.text.strip()
	user = update.effective_user
	if not user: return

	url_pattern = r'https?://[^\s<>"]+\|www\.[^\s<>"]+'
	match = re.search(url_pattern, message_text)

	if match:
	url = match.group(0)
	url = re.sub(r'[.,!?)\]>]+$', '', url) # Basic cleanup
	# Further clean URL if needed, e.g., removing tracking params (complex)
	logger.info(f"User {user.id} sent potential URL: {url}")

	context.user_data['url_to_summarize'] = url
	context.user_data['original_message_id'] = update.message.message_id

	keyboard = [
	[
	InlineKeyboardButton("📜 Paragraph", callback_data="paragraph"),
	InlineKeyboardButton("🔹 Bullet Points", callback_data="points")
	]
	]
	reply_markup = InlineKeyboardMarkup(keyboard)
	await update.message.reply_text(
	f"✅ Link received:\n`{url}`\n\nChoose your desired summary format:",
	reply_markup=reply_markup,
	parse_mode=ParseMode.MARKDOWN,
	link_preview_options={'is_disabled': True}
	)
	elif not message_text.startswith('/'):
	logger.debug(f"User {user.id} sent non-URL, non-command text: '{message_text[:50]}...'")
	if "http" in message_text or "www." in message_text or ".com" in message_text or ".org" in message_text or ".net" in message_text:
	await update.message.reply_text("Hmm, that looks like it might be a link, but please ensure it starts with `http://` or `https://` and is a valid URL.")


	async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
	"""Handles button presses for summary type selection."""
	query = update.callback_query
	if not query or not query.message or not query.from_user:
	logger.warning("Callback query received without essential data.")
	if query: await query.answer()
	return

	user = query.from_user
	summary_type = query.data
	query_id = query.id

	try:
	await query.answer()
	logger.debug(f"Acknowledged callback query {query_id} from user {user.id}")
	except Exception as e:
	logger.error(f"Error answering callback query {query_id} from user {user.id}: {e}", exc_info=True)

	url = context.user_data.get('url_to_summarize')
	message_id_to_edit = query.message.message_id

	logger.info(f"User {user.id} chose summary type '{summary_type}' for URL associated with message {message_id_to_edit}")

	if not url:
	logger.warning(f"No URL found in user_data for user {user.id} (callback query {query_id}). Editing message.")
	try:
	# Edit the message the button was attached to
	await query.edit_message_text(text="⚠️ Oops! I couldn't find the link associated with this request. Please send the link again.")
	except Exception as e:
	logger.error(f"Failed to edit message to show 'URL not found' error: {e}")
	return

	context.user_data.pop('url_to_summarize', None)
	context.user_data.pop('original_message_id', None)

	if not TELEGRAM_TOKEN:
	logger.critical("TELEGRAM_TOKEN is missing, cannot start background task!")
	try:
	await query.edit_message_text(text="❌ Internal configuration error. Cannot process request.")
	except Exception: pass
	return

	logger.info(f"Scheduling background task for user {user.id}, chat {query.message.chat_id}, message {message_id_to_edit}, type {summary_type}")
	asyncio.create_task(
	process_summary_task(
	user_id=user.id,
	chat_id=query.message.chat_id,
	message_id_to_edit=message_id_to_edit,
	url=url,
	summary_type=summary_type,
	bot_token=TELEGRAM_TOKEN
	),
	name=f"SummaryTask-{user.id}-{message_id_to_edit}"
	)


	async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
	"""Log Errors caused by Updates."""
	# Ignore errors related to background task exceptions that were already handled/logged
	if isinstance(context.error, AttributeError) and "'Bot' object has no attribute 'session'" in str(context.error):
	logger.debug(f"Ignoring known cleanup error in error_handler: {context.error}")
	return

	logger.error("Exception while handling an update:", exc_info=context.error)

	# Example: Inform user on specific, potentially temporary errors
	# if isinstance(context.error, (NetworkError, TimedOut)):
	# try:
	# if update and isinstance(update, Update) and update.effective_chat:
	# await context.bot.send_message(
	# chat_id=update.effective_chat.id,
	# text="I'm having temporary network issues. Please try again in a moment."
	# )
	# except Exception as e:
	# logger.error(f"Failed to send error notification to user: {e}")


	# --- Bot Setup Function ---
	async def setup_bot_config() -> Application:
	"""Configures the PTB Application."""
	logger.info("Configuring Telegram Application...")
	if not TELEGRAM_TOKEN:
	raise ValueError("TELEGRAM_TOKEN environment variable not set.")

	custom_request = HTTPXRequest(
	connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0, http_version="1.1"
	)

	application = (
	Application.builder()
	.token(TELEGRAM_TOKEN)
	.request(custom_request)
	.build()
	)

	application.add_handler(CommandHandler("start", start))
	application.add_handler(CommandHandler("help", help_command))
	application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
	application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
	application.add_error_handler(error_handler)

	logger.info("Telegram application handlers configured.")
	return application

	# --- ASGI Lifespan Context Manager ---
	@contextlib.asynccontextmanager
	async def lifespan(app: Starlette):
	"""Handles PTB startup and shutdown during ASGI lifespan."""
	global ptb_app, WEBHOOK_SECRET # Make secret global for access in webhook handler
	logger.info("ASGI Lifespan: Startup sequence initiated...")

	if not TELEGRAM_TOKEN:
	logger.critical("TELEGRAM_TOKEN is not set. Bot cannot start.")
	raise RuntimeError("Telegram token missing.")

	bot_info_text = "Bot info not available yet."
	try:
	ptb_app = await setup_bot_config()
	await ptb_app.initialize()
	bot_info = await ptb_app.bot.get_me()
	bot_info_text = f"@{bot_info.username} (ID: {bot_info.id})"
	logger.info(f"Bot initialized: {bot_info_text}")

	current_webhook_info = await ptb_app.bot.get_webhook_info()
	if current_webhook_info and current_webhook_info.url:
	logger.info(f"Found existing webhook: {current_webhook_info.url}. Attempting to delete it.")
	try:
	# Use drop_pending_updates=False if you want to process updates accumulated while down
	if await ptb_app.bot.delete_webhook(drop_pending_updates=True):
	logger.info("Existing webhook deleted successfully.")
	else:
	logger.warning("Failed to delete existing webhook (API returned False).")
	except Exception as e:
	logger.warning(f"Could not delete existing webhook: {e}")
	await asyncio.sleep(1)

	space_host = os.environ.get("SPACE_HOST")
	webhook_path = "/webhook"
	full_webhook_url = None
	if space_host:
	protocol = "https://" # Assume HTTPS for HF Spaces
	host = space_host.split('://')[-1] # Get host part regardless of existing protocol
	full_webhook_url = f"{protocol}{host.rstrip('/')}{webhook_path}"

	if full_webhook_url:
	logger.info(f"Attempting to set webhook to: {full_webhook_url}")
	# Use secret token if configured
	set_webhook_args = {
	"url": full_webhook_url,
	"allowed_updates": Update.ALL_TYPES,
	"drop_pending_updates": True,
	}
	if WEBHOOK_SECRET:
	set_webhook_args["secret_token"] = WEBHOOK_SECRET
	logger.info("Webhook will be set with a secret token.")

	await asyncio.sleep(1.0) # Slightly shorter wait
	try:
	await ptb_app.bot.set_webhook(**set_webhook_args)
	webhook_info = await ptb_app.bot.get_webhook_info()
	# Check if the URL and secret status match expectations
	if webhook_info.url == full_webhook_url:
	logger.info(f"Webhook successfully set: URL='{webhook_info.url}', Pending={webhook_info.pending_update_count}, Secret={bool(WEBHOOK_SECRET)}")
	else:
	logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', Got '{webhook_info.url}'")
	raise RuntimeError("Webhook URL mismatch after setting.")

	await ptb_app.start()
	logger.info("PTB Application started (webhook mode). Ready for updates.")
	except Exception as e:
	logger.error(f"FATAL: Failed to set webhook to {full_webhook_url}: {e}", exc_info=True)
	raise RuntimeError(f"Failed to set webhook: {e}") from e
	else:
	logger.critical("Could not construct valid HTTPS webhook URL from SPACE_HOST.")
	raise RuntimeError("Webhook URL could not be determined.")
	else:
	logger.critical("SPACE_HOST environment variable not found. Cannot set webhook for HF Space.")
	raise RuntimeError("SPACE_HOST env var missing, cannot run in webhook mode.")

	logger.info("ASGI Lifespan: Startup complete.")
	yield # Application runs here

	except Exception as startup_err:
	logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
	if ptb_app:
	if ptb_app.running: await ptb_app.stop()
	await ptb_app.shutdown()
	raise
	finally:
	logger.info("ASGI Lifespan: Shutdown sequence initiated...")
	if ptb_app:
	if ptb_app.running:
	logger.info("Stopping PTB application...")
	await ptb_app.stop()
	logger.info("Shutting down PTB application...")
	await ptb_app.shutdown() # This closes the main bot's request client
	logger.info("PTB Application shut down gracefully.")
	else:
	logger.info("PTB application was not initialized or startup failed.")
	logger.info("ASGI Lifespan: Shutdown complete.")


	# --- Starlette Route Handlers ---
	async def health_check(request: Request) -> PlainTextResponse:
	"""Basic health check endpoint."""
	bot_status = "Not Initialized"
	if ptb_app and ptb_app.bot:
	try:
	if ptb_app.running:
	# Using a flag or cached info is better than get_me repeatedly
	bot_info = await ptb_app.bot.get_me()
	bot_status = f"Running (@{bot_info.username})"
	else:
	bot_status = "Initialized but not running"
	except Exception as e:
	bot_status = f"Error checking status: {e}"
	return PlainTextResponse(f"Telegram Bot Summarizer - Status: {bot_status}\nModel: {OPENROUTER_MODEL}\nApify Actor: {APIFY_ACTOR_NAME if _apify_token_exists else 'N/A'}")


	async def telegram_webhook(request: Request) -> Response:
	"""Webhook endpoint called by Telegram."""
	global WEBHOOK_SECRET # Access the global secret
	if not ptb_app:
	logger.error("Webhook received but PTB application not initialized.")
	return PlainTextResponse('Bot not initialized', status_code=503)
	if not ptb_app.running:
	logger.warning("Webhook received but PTB application not running.")
	return PlainTextResponse('Bot initialized but not running', status_code=503)

	try:
	# Verify secret token if configured
	if WEBHOOK_SECRET:
	token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
	if token_header != WEBHOOK_SECRET:
	logger.warning(f"Webhook received with invalid secret token. Header: '{token_header}'")
	return Response(content="Invalid secret token", status_code=403) # Forbidden

	update_data = await request.json()
	update = Update.de_json(data=update_data, bot=ptb_app.bot)
	logger.debug(f"Processing update_id: {update.update_id} via webhook")
	# PTB's process_update runs the handlers
	await ptb_app.process_update(update)
	# Return 200 OK quickly to Telegram
	return Response(status_code=200)

	except json.JSONDecodeError:
	logger.error("Webhook received invalid JSON.")
	return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
	except Exception as e:
	logger.error(f"Error processing webhook update: {e}", exc_info=True)
	# Return 200 OK to Telegram even if processing failed, to avoid retries for app logic errors
	return Response(status_code=200)

	# --- Create Starlette ASGI Application ---
	app = Starlette(
	debug=False,
	lifespan=lifespan,
	routes=[
	Route("/", endpoint=health_check, methods=["GET"]),
	Route("/webhook", endpoint=telegram_webhook, methods=["POST"]),
	]
	)
	logger.info("Starlette ASGI application created with native routes.")

	# --- Development Server Execution Block (Optional) ---
	if __name__ == '__main__':
	import uvicorn
	logger.warning("Running in development mode using Uvicorn directly (not for production)")
	# Use LOGGING_LEVEL env var or default to info
	log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
	local_port = int(os.environ.get('PORT', 8080))
	# Run Uvicorn with the app instance
	uvicorn.run("__main__:app", host='0.0.0.0', port=local_port, log_level=log_level, reload=True) # Add reload for dev