fmab777 commited on
Commit
7c987d0
·
verified ·
1 Parent(s): d742ff9

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +117 -308
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Refactored to match Colab logic using httpx and ASGI structure)
2
  import os
3
  import re
4
  import logging
@@ -30,7 +30,7 @@ from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest, Teleg
30
  from telegram.request import HTTPXRequest, BaseRequest
31
 
32
  # --- Other Libraries ---
33
- import httpx # Use httpx for all async HTTP calls
34
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
35
  from bs4 import BeautifulSoup
36
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type, before_sleep_log
@@ -40,15 +40,12 @@ try:
40
  except ImportError:
41
  DEFAULT_PARSER = 'html.parser'
42
 
43
- # NOTE: apify-client is NOT used, as we replicate the REST API call from Colab
44
-
45
  # --- Logging Setup ---
46
  logging.basicConfig(
47
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
48
  level=logging.INFO
49
  )
50
  logging.getLogger("httpx").setLevel(logging.WARNING)
51
- # No apify_client logger needed
52
  logging.getLogger("telegram.ext").setLevel(logging.INFO)
53
  logging.getLogger('telegram.bot').setLevel(logging.INFO)
54
  logging.getLogger("urllib3").setLevel(logging.INFO)
@@ -72,36 +69,28 @@ def get_secret(secret_name):
72
 
73
  TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
74
  OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY')
75
- URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY') # For urltotext.com API
76
  SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
77
- APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN') # For Apify REST API call
78
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
79
 
80
- # Configuration matching Colab script
81
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
82
- APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo/youtube-transcripts") # Actor used in Colab
83
-
84
- # Check Essential Keys
85
- if not TELEGRAM_TOKEN:
86
- logger.critical("❌ FATAL: TELEGRAM_TOKEN not found in environment variables.")
87
- raise RuntimeError("Exiting: Telegram token missing.")
88
- if not OPENROUTER_API_KEY:
89
- logger.error(" ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
90
- # Allow running without summary capability? For now, we'll let it run but log error.
91
- # raise RuntimeError("Exiting: OpenRouter key missing.")
92
-
93
- # Log warnings for optional keys (used in fallbacks)
94
- if not URLTOTEXT_API_KEY: logger.warning("⚠️ WARNING: URLTOTEXT_API_KEY not found. Fallback website scraping unavailable.")
95
- if not SUPADATA_API_KEY: logger.warning("⚠️ WARNING: SUPADATA_API_KEY not found. First YT transcript fallback unavailable.")
96
- if not APIFY_API_TOKEN: logger.warning("⚠️ WARNING: APIFY_API_TOKEN not found. Second YT transcript fallback unavailable.")
97
 
98
  logger.info("Secret loading and configuration check finished.")
99
  logger.info(f"Using OpenRouter Model: {OPENROUTER_MODEL}")
100
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
101
 
102
- _apify_token_exists = bool(APIFY_API_TOKEN) # Flag for conditional logic
103
 
104
- # --- Retry Decorator (Unchanged) ---
105
  @retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15),
106
  retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)),
107
  before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
@@ -114,7 +103,7 @@ async def retry_bot_operation(func, *args, **kwargs):
114
  except TelegramError as e: logger.warning(f"TelegramError (will retry if applicable): {e}"); raise
115
  except Exception as e: logger.error(f"Unexpected error during bot operation: {e}", exc_info=True); raise
116
 
117
- # --- Helper Functions (Unchanged) ---
118
  def is_youtube_url(url):
119
  youtube_regex = re.compile( r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/' r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
120
  match = youtube_regex.search(url); logger.debug(f"is_youtube_url '{url}': {bool(match)}"); return bool(match)
@@ -124,16 +113,9 @@ def extract_youtube_id(url):
124
  if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
125
  else: logger.warning(f"Could not extract YT ID from {url}"); return None
126
 
127
- # --- Content Fetching Functions (Reimplemented based on Colab logic using httpx) ---
128
-
129
- # Generic fetcher used by website scraping (similar to previous version)
130
  async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
131
- """Fetches HTML content from a URL using httpx for scraping."""
132
- headers = { # Headers from Colab script
133
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
134
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
135
- 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1'
136
- }
137
  try:
138
  async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
139
  logger.debug(f"[Web Scrape] Sending request to {url}")
@@ -141,106 +123,81 @@ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[
141
  logger.debug(f"[Web Scrape] Received response {response.status_code} from {url}")
142
  response.raise_for_status()
143
  content_type = response.headers.get('content-type', '').lower()
144
- if 'html' not in content_type:
145
- logger.warning(f"[Web Scrape] Non-HTML content type from {url}: {content_type}")
146
- return None
147
- try: return response.text # Let httpx handle encoding
148
  except Exception as e: logger.error(f"[Web Scrape] Error decoding response for {url}: {e}"); return None
149
  except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape] HTTP error {e.response.status_code} fetching {url}: {e}")
150
  except httpx.TimeoutException: logger.error(f"[Web Scrape] Timeout error fetching {url}")
151
  except httpx.TooManyRedirects: logger.error(f"[Web Scrape] Too many redirects fetching {url}")
152
- except httpx.RequestError as e: logger.error(f"[Web Scrape] Request error fetching {url}: {e}") # Covers ConnectError etc.
153
  except Exception as e: logger.error(f"[Web Scrape] Unexpected error fetching {url}: {e}", exc_info=True)
154
  return None
155
 
156
- # --- YT Transcript Fetching ---
157
  async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
158
- """Fetches YouTube transcript using Supadata API (matching Colab endpoint)."""
159
  if not video_id: logger.error("[Supadata] No video_id provided"); return None
160
  if not api_key: logger.error("[Supadata] API key missing."); return None
161
  logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}")
162
- # Colab script uses /v1/youtube/transcript
163
  api_endpoint = "https://api.supadata.net/v1/youtube/transcript"
164
- params = {"videoId": video_id, "format": "text"} # Params from Colab script
165
- headers = {"X-API-Key": api_key}
166
  try:
167
  async with httpx.AsyncClient(timeout=30.0) as client:
168
  response = await client.get(api_endpoint, headers=headers, params=params)
169
  logger.debug(f"[Supadata] Status code {response.status_code} for {video_id}")
170
  if response.status_code == 200:
171
  try:
172
- # Try JSON first, then plain text as fallback (Colab logic)
173
  try: data = response.json()
174
  except json.JSONDecodeError: data = None
175
  content = None
176
  if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
177
- if not content and response.text: content = response.text # Plain text fallback
178
- if content and isinstance(content, str):
179
- logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}")
180
- return content.strip()
181
  else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
182
  except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
183
- elif response.status_code in [401, 403]: logger.error(f"[Supadata] Auth error ({response.status_code}). Check API key."); return None # Don't retry
184
  elif response.status_code == 404: logger.warning(f"[Supadata] Not found (404) for {video_id}."); return None
185
  else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
186
  except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
187
- except httpx.RequestError as e: logger.error(f"[Supadata] Request error for {video_id}: {e}"); return None # Includes ConnectError, SSL problems etc.
188
  except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
189
 
190
  async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
191
- """Fetches YouTube transcript using Apify REST API (matching Colab actor/endpoint)."""
192
- global APIFY_ACTOR_ID # Use globally configured actor
193
  if not video_url: logger.error("[Apify] No video_url provided"); return None
194
  if not api_token: logger.error("[Apify] API token missing."); return None
195
  logger.info(f"[Apify] Attempting fetch via REST for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
196
  api_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
197
  params = {"token": api_token}
198
- # Payload from Colab script
199
- payload = {
200
- "urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5,
201
- "channelHandleBoolean": False, "channelNameBoolean": False,
202
- "datePublishedBoolean": False, "relativeDateTextBoolean": False,
203
- # Add proxy config here if needed and configured via env vars
204
- # "proxyOptions": { "useApifyProxy": True, "apifyProxyGroups": ["YOUR_PROXY_GROUP_IF_ANY"] },
205
- }
206
  headers = {"Content-Type": "application/json"}
207
  try:
208
- async with httpx.AsyncClient(timeout=90.0) as client: # Longer timeout for sync run
209
  logger.debug(f"[Apify] Sending request to run actor {APIFY_ACTOR_ID} synchronously for {video_url}")
210
- response = await client.post(api_endpoint, headers=headers, params=params, json=payload) # Use json=payload with httpx
211
  logger.debug(f"[Apify] Received status code {response.status_code} for {video_url}")
212
  if response.status_code == 200:
213
  try:
214
  results = response.json()
215
  if isinstance(results, list) and len(results) > 0:
216
  item = results[0]
217
- # Parsing logic from Colab script
218
  content = item.get("text") or item.get("transcript") or item.get("captions_concatenated")
219
- if not content and item.get("captions") and isinstance(item["captions"], list):
220
- logger.info("[Apify] Processing 'captions' format.")
221
- content = " ".join(cap.get("text", "") for cap in item["captions"] if cap.get("text"))
222
- if content and isinstance(content, str):
223
- logger.info(f"[Apify] Success via REST for {video_url}. Length: {len(content)}")
224
- return content.strip()
225
  else: logger.warning(f"[Apify] Actor success but transcript empty/not found for {video_url}. Item: {item}"); return None
226
  else: logger.warning(f"[Apify] Actor success but dataset empty for {video_url}. Response: {results}"); return None
227
  except json.JSONDecodeError: logger.error(f"[Apify] Failed JSON decode for {video_url}. Status:{response.status_code}. Resp:{response.text[:200]}"); return None
228
  except Exception as e: logger.error(f"[Apify] Error processing success response for {video_url}: {e}", exc_info=True); return None
229
  elif response.status_code == 400: logger.error(f"[Apify] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
230
- elif response.status_code == 401: logger.error("[Apify] Auth error (401). Check token."); return None # Don't retry
231
  else: logger.error(f"[Apify] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
232
  except httpx.TimeoutException: logger.error(f"[Apify] Timeout running actor for {video_url}"); return None
233
  except httpx.RequestError as e: logger.error(f"[Apify] Request error running actor for {video_url}: {e}"); return None
234
  except Exception as e: logger.error(f"[Apify] Unexpected error during Apify REST call for {video_url}: {e}", exc_info=True); return None
235
 
236
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
237
- """Fetches YT transcript using primary (lib) + fallbacks (Supadata, Apify REST)."""
238
- global SUPADATA_API_KEY, APIFY_API_TOKEN # Access globally loaded keys
239
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
240
  logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
241
  transcript_text = None
242
-
243
- # 1. Primary: youtube-transcript-api
244
  logger.info("[Primary YT] Attempting youtube-transcript-api...")
245
  try:
246
  transcript_list = await asyncio.to_thread( YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'] )
@@ -252,8 +209,6 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
252
  if "No transcript found" in str(e): logger.warning(f"[Primary YT] No transcript found.")
253
  elif "disabled" in str(e): logger.warning(f"[Primary YT] Transcripts disabled.")
254
  transcript_text = None
255
-
256
- # 2. Fallback 1: Supadata API
257
  if transcript_text is None:
258
  logger.info("[Fallback YT 1] Trying Supadata API...")
259
  if SUPADATA_API_KEY:
@@ -261,8 +216,6 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
261
  if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text
262
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
263
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
264
-
265
- # 3. Fallback 2: Apify REST API
266
  if transcript_text is None:
267
  logger.info("[Fallback YT 2] Trying Apify REST API...")
268
  if APIFY_API_TOKEN:
@@ -270,25 +223,18 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
270
  if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify REST for {video_url}"); return transcript_text
271
  else: logger.warning(f"[Fallback YT 2] Apify REST failed or no content for {video_url}.")
272
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
273
-
274
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
275
- return transcript_text # Should be None if all failed
276
 
277
- # --- Website Content Fetching ---
278
  async def get_website_content(url: str) -> Optional[str]:
279
- """Primary: Scrapes website using httpx + BeautifulSoup (logic from Colab)."""
280
  if not url: logger.error("get_website_content: No URL"); return None
281
  logger.info(f"[Primary Web] Fetching website content for: {url}")
282
  html_content = await fetch_url_content_for_scrape(url)
283
  if not html_content: return None
284
  try:
285
  def parse_html(content):
286
- # Use lxml if available, otherwise html.parser
287
  soup = BeautifulSoup(content, DEFAULT_PARSER)
288
- # Removal logic from Colab script
289
- for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]):
290
- element.extract()
291
- # Content finding logic from Colab script
292
  main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
293
  target_element = main_content if main_content else soup.body
294
  if not target_element: logger.warning(f"[Primary Web] Could not find body/main for parsing {url}"); return None
@@ -296,94 +242,52 @@ async def get_website_content(url: str) -> Optional[str]:
296
  text = " ".join(lines)
297
  if not text: logger.warning(f"[Primary Web] Extracted text empty after clean for {url}"); return None
298
  return text
299
-
300
  text_content = await asyncio.to_thread(parse_html, html_content)
301
  if text_content: logger.info(f"[Primary Web] Success scrape for {url} (final len: {len(text_content)})"); return text_content
302
  else: return None
303
  except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
304
 
305
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
306
- """Fallback: Fetches website content using urltotext.com API (Colab endpoint)."""
307
  if not url: logger.error("[Fallback Web API] No URL"); return None
308
  if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
309
  logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
310
- # Endpoint and payload from Colab script
311
  api_endpoint = "https://urltotext.com/api/v1/urltotext/"
312
  payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
313
- headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" } # Note: Token auth
314
  try:
315
- async with httpx.AsyncClient(timeout=45.0) as client: # Longer timeout for JS render
316
  logger.debug(f"[Fallback Web API] Sending request to urltotext.com API for {url}")
317
  response = await client.post(api_endpoint, headers=headers, json=payload)
318
  logger.debug(f"[Fallback Web API] Received status {response.status_code} from urltotext.com API for {url}")
319
  if response.status_code == 200:
320
  try:
321
  data = response.json()
322
- content = data.get("data", {}).get("content")
323
- credits = data.get("credits_used", "N/A")
324
- warning = data.get("data", {}).get("warning")
325
  if warning: logger.warning(f"[Fallback Web API] urltotext.com API Warning for {url}: {warning}")
326
  if content: logger.info(f"[Fallback Web API] Success via urltotext.com API for {url}. Len: {len(content)}. Credits: {credits}"); return content.strip()
327
  else: logger.warning(f"[Fallback Web API] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
328
  except json.JSONDecodeError: logger.error(f"[Fallback Web API] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
329
  except Exception as e: logger.error(f"[Fallback Web API] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
330
- # Error codes from Colab script check
331
  elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[Fallback Web API] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
332
  else: logger.error(f"[Fallback Web API] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
333
  except httpx.TimeoutException: logger.error(f"[Fallback Web API] Timeout connecting to urltotext.com API for {url}"); return None
334
  except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
335
  except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
336
 
337
- # --- Summarization Function (Using DeepSeek via OpenRouter - Colab logic) ---
338
  async def generate_summary(text: str, summary_type: str) -> str:
339
- """Generates summary using DeepSeek via OpenRouter API (Colab prompts/model)."""
340
- global OPENROUTER_API_KEY, OPENROUTER_MODEL # Use globally loaded config
341
  logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
342
  if not OPENROUTER_API_KEY: logger.error("OpenRouter key missing for generate_summary."); return "Error: AI model configuration key missing."
343
-
344
- # Prompts from Colab script
345
- if summary_type == "paragraph":
346
- prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
347
- "• Clear and simple language suitable for someone unfamiliar with the topic.\n"
348
- "• Uses British English spellings throughout.\n"
349
- "• Straightforward and understandable vocabulary; avoid complex terms.\n"
350
- "• Presented as ONE SINGLE PARAGRAPH.\n"
351
- "• No more than 85 words maximum; but does not have to be exactly 85.\n"
352
- "• Considers the entire text content equally.\n"
353
- "• Uses semicolons (;) instead of em dashes (– or —).\n\n"
354
- "Here is the text to summarise:")
355
- else: # points
356
- prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n"
357
- "• For each distinct topic or section identified in the text, create a heading.\n"
358
- "• Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n"
359
- "• Immediately following each heading, list the key points as a bulleted list.\n"
360
- "• Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n"
361
- "• The text within each bullet point should NOT contain any bold formatting.\n"
362
- "• Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n"
363
- "• Use British English spellings throughout.\n"
364
- "• Avoid overly complex or advanced vocabulary.\n"
365
- "• Keep bullet points concise.\n"
366
- "• Ensure the entire summary takes no more than two minutes to read.\n"
367
- "• Consider the entire text's content, not just the beginning or a few topics.\n"
368
- "• Use semicolons (;) instead of em dashes (– or —).\n\n"
369
- "Here is the text to summarise:")
370
-
371
- # Limit input length (Colab script used 500k, adjust if needed)
372
  MAX_INPUT_LENGTH = 500000
373
- if len(text) > MAX_INPUT_LENGTH:
374
- logger.warning(f"Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating.")
375
- text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
376
  full_prompt = f"{prompt}\n\n{text}"
377
-
378
- headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }
379
- payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }
380
- openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
381
-
382
  try:
383
- async with httpx.AsyncClient(timeout=60.0) as client: # Timeout from Colab
384
- logger.debug(f"Sending request to OpenRouter ({OPENROUTER_MODEL})...")
385
- response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
386
- logger.debug(f"Received status {response.status_code} from OpenRouter.")
387
  if response.status_code == 200:
388
  try:
389
  data = response.json()
@@ -397,7 +301,6 @@ async def generate_summary(text: str, summary_type: str) -> str:
397
  else: logger.error(f"Unexpected choices structure in OpenRouter resp: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse AI response (choices)."
398
  except json.JSONDecodeError: logger.error(f"Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand AI response."
399
  except Exception as e: logger.error(f"Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing AI response."
400
- # Error handling matching Colab script
401
  elif response.status_code == 401: logger.error("OpenRouter API key invalid (401)."); return "Error: AI model configuration key is invalid."
402
  elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
403
  elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
@@ -407,122 +310,78 @@ async def generate_summary(text: str, summary_type: str) -> str:
407
  except httpx.RequestError as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, error connecting to AI service."
408
  except Exception as e: logger.error(f"Unexpected error in generate_summary (OpenRouter): {e}", exc_info=True); return "Sorry, unexpected error generating summary."
409
 
410
- # --- Background Task Processing (Orchestrates new fetch/summary functions) ---
411
  async def process_summary_task(
412
  user_id: int, chat_id: int, message_id_to_edit: Optional[int],
413
- url: str, summary_type: str, bot_token: str # bot_token needed to create instance
414
  ) -> None:
415
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"
416
  logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
417
- background_request: Optional[BaseRequest] = None
418
- bot: Optional[Bot] = None
419
- try: # Create background bot instance
420
- background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 )
421
- bot = Bot(token=bot_token, request=background_request)
422
  except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
423
-
424
  content = None; user_feedback_message = None; success = False
425
- status_message_id = message_id_to_edit
426
- message_to_delete_later_id : Optional[int] = None # Track ID of new status message
427
-
428
  try:
429
- # --- Inform User Processing Started ---
430
  processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
431
  if status_message_id:
432
- try:
433
- await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id,
434
- text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None )
435
- logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
436
- except Exception as e:
437
- logger.warning(f"[Task {task_id}] Could not edit original message {status_message_id}: {e}. Sending new.")
438
- status_message_id = None # Will trigger sending new message
439
- if not status_message_id: # Send new status message if needed
440
  try:
441
  status_message = await retry_bot_operation( bot.send_message, chat_id=chat_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN )
442
  if status_message: message_to_delete_later_id = status_message.message_id; logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
443
  else: raise RuntimeError("Failed to send status message after retries.")
444
- except Exception as e: logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}"); raise # Stop if we can't inform user
445
-
446
- # --- Main Fetching & Summarization ---
447
  try:
448
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
449
- is_youtube = is_youtube_url(url)
450
- logger.debug(f"[Task {task_id}] URL type: {'YouTube' if is_youtube else 'Website'}")
451
-
452
  if is_youtube:
453
  video_id = extract_youtube_id(url)
454
- if video_id: content = await get_youtube_transcript(video_id, url) # Calls new func with fallbacks
455
  else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
456
- if not content and not user_feedback_message: # Set default fail message if get_youtube_transcript returned None
457
- user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
458
- else: # Website
459
- content = await get_website_content(url) # Calls new primary func
460
- if not content: # Try fallback
461
  logger.warning(f"[Task {task_id}] Primary web scrape failed for {url}. Trying fallback API.")
462
- global URLTOTEXT_API_KEY # Access key
463
  if URLTOTEXT_API_KEY:
464
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
465
- content = await get_website_content_via_api(url, URLTOTEXT_API_KEY) # Calls new fallback func
466
  if not content: user_feedback_message = "Sorry, I couldn't fetch content from that website using either method (blocked/inaccessible/empty?)."
467
- else:
468
- user_feedback_message = "Sorry, I couldn't fetch content from that website (blocked/inaccessible/empty?). The fallback method is not configured."
469
-
470
- # --- Generate Summary ---
471
  if content:
472
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
473
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
474
- # Use new generate_summary function (keys accessed globally within it)
475
  final_summary = await generate_summary(content, summary_type)
476
- if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
477
- user_feedback_message = final_summary # Use error from summary func
478
- logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
479
  else:
480
- # Success! Send summary (split if needed)
481
- max_length = 4096
482
- summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
483
- # Send first/only part
484
- await retry_bot_operation( bot.send_message, chat_id=chat_id, text=summary_parts[0],
485
- parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True} )
486
- # Send subsequent parts
487
  for part in summary_parts[1:]: await asyncio.sleep(0.5); await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True} )
488
- success = True; logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts).")
489
- user_feedback_message = None # Clear any previous error
490
-
491
- # --- Send Feedback if Fetching or Summary Failed ---
492
- elif user_feedback_message: # Only send if content failed AND message exists
493
- logger.warning(f"[Task {task_id}] Sending failure feedback: {user_feedback_message}")
494
- await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )
495
-
496
  except Exception as e:
497
- logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True)
498
- user_feedback_message = "Oops! Something went really wrong while processing your request. Please try again later."
499
- # Ensure we send this feedback if an unexpected exception occurs
500
  try: await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
501
  except Exception: logger.error(f"[Task {task_id}] Failed to send unexpected error feedback.")
502
-
503
- except Exception as outer_e: # Catch critical errors like failing to send status message
504
  logger.critical(f"[Task {task_id}] Critical outer error: {outer_e}", exc_info=True)
505
  try:
506
  if bot: await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ Critical internal error occurred." )
507
  except Exception: logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
508
-
509
  finally:
510
- # --- Cleanup ---
511
  delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
512
  if delete_target_id and bot:
513
- try:
514
- # Delete the original button message OR the status message we sent
515
- await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id)
516
- logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
517
  except Exception as del_e: logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
518
- # Close background bot's httpx client
519
  if background_request and hasattr(background_request, '_client') and background_request._client:
520
  try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
521
  except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
522
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
523
 
524
-
525
- # --- Telegram Bot Handlers (Unchanged structure, Colab text/logic adjusted) ---
526
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
527
  user = update.effective_user; mention = user.mention_html()
528
  if not user or not update.message: return
@@ -533,105 +392,72 @@ async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> No
533
  user = update.effective_user
534
  if not user or not update.message: return
535
  logger.info(f"User {user.id} used /help.")
536
- # Help text from Colab script
537
- help_text = ( "🔍 How to use this bot:\n\n"
538
- "1. Send me any YouTube video link or website URL.\n"
539
- "2. I'll ask you how you want it summarized (paragraph or points).\n"
540
- "3. Click the button for your choice.\n"
541
- "4. Wait for the summary!\n\n"
542
- "I'll try multiple methods to get content if the first one fails (especially for YouTube transcripts).\n\n"
543
- "Commands:\n"
544
- "`/start` - Display welcome message\n"
545
- "`/help` - Show this help message" )
546
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
547
 
548
  async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
549
  if not update.message or not update.message.text: return
550
  url = update.message.text.strip(); user = update.effective_user
551
  if not user: return
552
- # Basic validation from Colab script
553
- if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]:
554
- logger.debug(f"Ignoring non-URL from {user.id}: {url}"); return
555
  logger.info(f"User {user.id} sent potential URL: {url}")
556
- context.user_data['url_to_summarize'] = url
557
- context.user_data['original_message_id'] = update.message.message_id # Still useful potentially
558
- # Keyboard text from Colab script
559
  keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
560
  reply_markup = InlineKeyboardMarkup(keyboard)
561
- # Reply text from Colab script
562
- await update.message.reply_text( f"Okay, I see this link:\n{url}\n\nHow would you like it summarized?",
563
- reply_markup=reply_markup, disable_web_page_preview=True )
564
 
565
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
566
- """Handles button press, retrieves URL, and schedules background task."""
567
  query = update.callback_query
568
  if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
569
  user = query.from_user; summary_type = query.data; query_id = query.id
570
  try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id}")
571
  except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True)
572
-
573
- url = context.user_data.get('url_to_summarize')
574
- message_id_to_edit = query.message.message_id # Use the message with the buttons
575
  logger.info(f"User {user.id} chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
576
-
577
  if not url:
578
  logger.warning(f"No URL in context for user {user.id} (cb {query_id}).")
579
- try: await query.edit_message_text(text="Sorry, I couldn't find the URL associated with this request. Please send the link again.")
580
- except Exception as e: logger.error(f"Failed edit 'URL not found' msg: {e}"); try: await context.bot.send_message(chat_id=user.id, text="Sorry, context lost. Send link again.") except Exception: pass
 
 
 
 
 
 
 
 
581
  return
582
 
583
- # Clear context *before* starting task to prevent race conditions if user clicks fast
584
- context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None)
585
- logger.debug(f"Cleared URL context for user {user.id}")
586
-
587
- # Check essential keys needed for the task *before* scheduling
588
  global TELEGRAM_TOKEN, OPENROUTER_API_KEY
589
  if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing!"); try: await query.edit_message_text(text="❌ Bot config error.") except Exception: pass; return
590
  if not OPENROUTER_API_KEY: logger.error("OpenRouter key missing!"); try: await query.edit_message_text(text="❌ AI config error.") except Exception: pass; return
591
-
592
  logger.info(f"Scheduling task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
593
- # Pass the bot token to the background task so it can create its own instance
594
- asyncio.create_task( process_summary_task( user_id=user.id, chat_id=query.message.chat_id, message_id_to_edit=message_id_to_edit,
595
- url=url, summary_type=summary_type, bot_token=TELEGRAM_TOKEN ), name=f"SummaryTask-{user.id}-{message_id_to_edit}" )
596
 
597
- # --- Error Handler, Bot Setup, Lifespan, Routes (Largely Unchanged, Ensure Keys Read) ---
598
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
599
- """Log Errors caused by Updates."""
600
- # Add specific error types to ignore if they are handled elsewhere or benign
601
- ignore_errors = (AttributeError, ) # Example: Ignore cleanup errors if handled in finally blocks
602
- if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): # Be more specific
603
- logger.debug(f"Ignoring known/handled error in error_handler: {context.error}")
604
- return
605
  logger.error("Exception while handling an update:", exc_info=context.error)
606
 
 
607
  async def setup_bot_config() -> Application:
608
- """Configures the PTB Application."""
609
- logger.info("Configuring Telegram Application...")
610
- global TELEGRAM_TOKEN # Ensure global token is accessible
611
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
612
  custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
613
  application = Application.builder().token(TELEGRAM_TOKEN).request(custom_request).build()
614
- application.add_handler(CommandHandler("start", start))
615
- application.add_handler(CommandHandler("help", help_command))
616
- application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
617
- application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
618
- application.add_error_handler(error_handler)
619
- logger.info("Telegram application handlers configured.")
620
- return application
621
 
 
622
  @contextlib.asynccontextmanager
623
  async def lifespan(app: Starlette):
624
- """Handles PTB startup and shutdown during ASGI lifespan."""
625
- global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN # Access globals
626
- logger.info("ASGI Lifespan: Startup initiated...")
627
- # Essential key check already happened globally, but double-check token
628
  if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
629
  try:
630
- ptb_app = await setup_bot_config()
631
- await ptb_app.initialize()
632
- bot_info = await ptb_app.bot.get_me()
633
- logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
634
- # --- Webhook setup (unchanged from previous version) ---
635
  current_webhook_info = await ptb_app.bot.get_webhook_info()
636
  if current_webhook_info and current_webhook_info.url:
637
  logger.info(f"Found existing webhook: {current_webhook_info.url}. Deleting...")
@@ -639,81 +465,64 @@ async def lifespan(app: Starlette):
639
  if await ptb_app.bot.delete_webhook(drop_pending_updates=True): logger.info("Webhook deleted.")
640
  else: logger.warning("Failed delete webhook (API returned False).")
641
  except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1)
642
- space_host = os.environ.get("SPACE_HOST")
643
- webhook_path = "/webhook"; full_webhook_url = None
644
  if space_host:
645
- protocol = "https://"; host = space_host.split('://')[-1]
646
- full_webhook_url = f"{protocol}{host.rstrip('/')}{webhook_path}"
647
  if full_webhook_url:
648
- logger.info(f"Setting webhook: {full_webhook_url}")
649
- set_webhook_args = { "url": full_webhook_url, "allowed_updates": Update.ALL_TYPES, "drop_pending_updates": True }
650
  if WEBHOOK_SECRET: set_webhook_args["secret_token"] = WEBHOOK_SECRET; logger.info("Using webhook secret.")
651
  await asyncio.sleep(1.0)
652
  try:
653
- await ptb_app.bot.set_webhook(**set_webhook_args)
654
- webhook_info = await ptb_app.bot.get_webhook_info()
655
  if webhook_info.url == full_webhook_url: logger.info(f"Webhook set: URL='{webhook_info.url}', Secret={bool(WEBHOOK_SECRET)}")
656
  else: logger.error(f"Webhook URL mismatch! Expected '{full_webhook_url}', Got '{webhook_info.url}'"); raise RuntimeError("Webhook URL mismatch.")
657
- await ptb_app.start()
658
- logger.info("PTB Application started (webhook mode).")
659
  except Exception as e: logger.error(f"FATAL: Failed set webhook: {e}", exc_info=True); raise RuntimeError(f"Failed set webhook: {e}") from e
660
  else: logger.critical("Could not construct webhook URL."); raise RuntimeError("Webhook URL undetermined.")
661
  else: logger.critical("SPACE_HOST missing."); raise RuntimeError("SPACE_HOST env var missing.")
662
- # --- End Webhook Setup ---
663
- logger.info("ASGI Lifespan: Startup complete.")
664
- yield # App runs
665
  except Exception as startup_err:
666
  logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
667
  if ptb_app:
668
  if ptb_app.running: await ptb_app.stop()
669
  await ptb_app.shutdown()
670
  raise
671
- finally: # Shutdown
672
  logger.info("ASGI Lifespan: Shutdown initiated...")
673
  if ptb_app:
674
  if ptb_app.running: logger.info("Stopping PTB..."); await ptb_app.stop()
675
- logger.info("Shutting down PTB..."); await ptb_app.shutdown()
676
- logger.info("PTB Application shut down.")
677
  else: logger.info("PTB application not initialized or failed.")
678
  logger.info("ASGI Lifespan: Shutdown complete.")
679
 
 
680
  async def health_check(request: Request) -> PlainTextResponse:
681
- """Basic health check endpoint."""
682
- global OPENROUTER_MODEL, APIFY_ACTOR_ID, _apify_token_exists
683
- bot_status = "Not Initialized"
684
  if ptb_app and ptb_app.bot:
685
  try:
686
  if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
687
  else: bot_status = "Initialized/Not running"
688
  except Exception as e: bot_status = f"Error checking status: {e}"
689
- # Include model/actor info in health check
690
  return PlainTextResponse(f"TG Bot Summarizer - Status: {bot_status}\nModel: {OPENROUTER_MODEL}\nApify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}")
691
 
692
  async def telegram_webhook(request: Request) -> Response:
693
- """Webhook endpoint called by Telegram."""
694
- global WEBHOOK_SECRET # Access global
695
  if not ptb_app: logger.error("Webhook recv but PTB not initialized."); return PlainTextResponse('Bot not initialized', status_code=503)
696
  if not ptb_app.running: logger.warning("Webhook recv but PTB not running."); return PlainTextResponse('Bot not running', status_code=503)
697
  try:
698
- # Secret check (unchanged)
699
  if WEBHOOK_SECRET:
700
  token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
701
  if token_header != WEBHOOK_SECRET: logger.warning(f"Webhook invalid secret. Header: '{token_header}'"); return Response(content="Invalid secret token", status_code=403)
702
- update_data = await request.json()
703
- update = Update.de_json(data=update_data, bot=ptb_app.bot)
704
- logger.debug(f"Processing update_id: {update.update_id} via webhook")
705
- await ptb_app.process_update(update)
706
- return Response(status_code=200) # OK
707
  except json.JSONDecodeError: logger.error("Webhook invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
708
  except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
709
 
710
- # --- Create Starlette ASGI Application (Unchanged) ---
711
- app = Starlette( debug=False, lifespan=lifespan, routes=[
712
- Route("/", endpoint=health_check, methods=["GET"]),
713
- Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
714
  logger.info("Starlette ASGI application created with native routes.")
715
 
716
- # --- Development Server Block (Unchanged) ---
717
  if __name__ == '__main__':
718
  import uvicorn
719
  logger.warning("Running in development mode using Uvicorn directly")
 
1
+ # main.py (Correcting SyntaxError at line 580)
2
  import os
3
  import re
4
  import logging
 
30
  from telegram.request import HTTPXRequest, BaseRequest
31
 
32
  # --- Other Libraries ---
33
+ import httpx
34
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
35
  from bs4 import BeautifulSoup
36
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type, before_sleep_log
 
40
  except ImportError:
41
  DEFAULT_PARSER = 'html.parser'
42
 
 
 
43
  # --- Logging Setup ---
44
  logging.basicConfig(
45
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
46
  level=logging.INFO
47
  )
48
  logging.getLogger("httpx").setLevel(logging.WARNING)
 
49
  logging.getLogger("telegram.ext").setLevel(logging.INFO)
50
  logging.getLogger('telegram.bot').setLevel(logging.INFO)
51
  logging.getLogger("urllib3").setLevel(logging.INFO)
 
69
 
70
  TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
71
  OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY')
72
+ URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
73
  SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
74
+ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
75
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
76
 
 
77
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
78
+ APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo/youtube-transcripts")
79
+
80
+ if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
81
+ if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
82
+
83
+ if not URLTOTEXT_API_KEY: logger.warning("⚠️ WARNING: URLTOTEXT_API_KEY not found.")
84
+ if not SUPADATA_API_KEY: logger.warning("⚠️ WARNING: SUPADATA_API_KEY not found.")
85
+ if not APIFY_API_TOKEN: logger.warning("⚠️ WARNING: APIFY_API_TOKEN not found.")
 
 
 
 
 
 
 
86
 
87
  logger.info("Secret loading and configuration check finished.")
88
  logger.info(f"Using OpenRouter Model: {OPENROUTER_MODEL}")
89
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
90
 
91
+ _apify_token_exists = bool(APIFY_API_TOKEN)
92
 
93
+ # --- Retry Decorator ---
94
  @retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15),
95
  retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)),
96
  before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
 
103
  except TelegramError as e: logger.warning(f"TelegramError (will retry if applicable): {e}"); raise
104
  except Exception as e: logger.error(f"Unexpected error during bot operation: {e}", exc_info=True); raise
105
 
106
+ # --- Helper Functions ---
107
  def is_youtube_url(url):
108
  youtube_regex = re.compile( r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/' r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
109
  match = youtube_regex.search(url); logger.debug(f"is_youtube_url '{url}': {bool(match)}"); return bool(match)
 
113
  if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
114
  else: logger.warning(f"Could not extract YT ID from {url}"); return None
115
 
116
+ # --- Content Fetching Functions ---
 
 
117
  async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
118
+ headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
 
 
 
 
 
119
  try:
120
  async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
121
  logger.debug(f"[Web Scrape] Sending request to {url}")
 
123
  logger.debug(f"[Web Scrape] Received response {response.status_code} from {url}")
124
  response.raise_for_status()
125
  content_type = response.headers.get('content-type', '').lower()
126
+ if 'html' not in content_type: logger.warning(f"[Web Scrape] Non-HTML content type from {url}: {content_type}"); return None
127
+ try: return response.text
 
 
128
  except Exception as e: logger.error(f"[Web Scrape] Error decoding response for {url}: {e}"); return None
129
  except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape] HTTP error {e.response.status_code} fetching {url}: {e}")
130
  except httpx.TimeoutException: logger.error(f"[Web Scrape] Timeout error fetching {url}")
131
  except httpx.TooManyRedirects: logger.error(f"[Web Scrape] Too many redirects fetching {url}")
132
+ except httpx.RequestError as e: logger.error(f"[Web Scrape] Request error fetching {url}: {e}")
133
  except Exception as e: logger.error(f"[Web Scrape] Unexpected error fetching {url}: {e}", exc_info=True)
134
  return None
135
 
 
136
  async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
 
137
  if not video_id: logger.error("[Supadata] No video_id provided"); return None
138
  if not api_key: logger.error("[Supadata] API key missing."); return None
139
  logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}")
 
140
  api_endpoint = "https://api.supadata.net/v1/youtube/transcript"
141
+ params = {"videoId": video_id, "format": "text"}; headers = {"X-API-Key": api_key}
 
142
  try:
143
  async with httpx.AsyncClient(timeout=30.0) as client:
144
  response = await client.get(api_endpoint, headers=headers, params=params)
145
  logger.debug(f"[Supadata] Status code {response.status_code} for {video_id}")
146
  if response.status_code == 200:
147
  try:
 
148
  try: data = response.json()
149
  except json.JSONDecodeError: data = None
150
  content = None
151
  if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
152
+ if not content and response.text: content = response.text
153
+ if content and isinstance(content, str): logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}"); return content.strip()
 
 
154
  else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
155
  except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
156
+ elif response.status_code in [401, 403]: logger.error(f"[Supadata] Auth error ({response.status_code}). Check API key."); return None
157
  elif response.status_code == 404: logger.warning(f"[Supadata] Not found (404) for {video_id}."); return None
158
  else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
159
  except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
160
+ except httpx.RequestError as e: logger.error(f"[Supadata] Request error for {video_id}: {e}"); return None
161
  except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
162
 
163
  async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
164
+ global APIFY_ACTOR_ID
 
165
  if not video_url: logger.error("[Apify] No video_url provided"); return None
166
  if not api_token: logger.error("[Apify] API token missing."); return None
167
  logger.info(f"[Apify] Attempting fetch via REST for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
168
  api_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
169
  params = {"token": api_token}
170
+ payload = { "urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5, "channelHandleBoolean": False, "channelNameBoolean": False, "datePublishedBoolean": False, "relativeDateTextBoolean": False, }
 
 
 
 
 
 
 
171
  headers = {"Content-Type": "application/json"}
172
  try:
173
+ async with httpx.AsyncClient(timeout=90.0) as client:
174
  logger.debug(f"[Apify] Sending request to run actor {APIFY_ACTOR_ID} synchronously for {video_url}")
175
+ response = await client.post(api_endpoint, headers=headers, params=params, json=payload)
176
  logger.debug(f"[Apify] Received status code {response.status_code} for {video_url}")
177
  if response.status_code == 200:
178
  try:
179
  results = response.json()
180
  if isinstance(results, list) and len(results) > 0:
181
  item = results[0]
 
182
  content = item.get("text") or item.get("transcript") or item.get("captions_concatenated")
183
+ if not content and item.get("captions") and isinstance(item["captions"], list): logger.info("[Apify] Processing 'captions' format."); content = " ".join(cap.get("text", "") for cap in item["captions"] if cap.get("text"))
184
+ if content and isinstance(content, str): logger.info(f"[Apify] Success via REST for {video_url}. Length: {len(content)}"); return content.strip()
 
 
 
 
185
  else: logger.warning(f"[Apify] Actor success but transcript empty/not found for {video_url}. Item: {item}"); return None
186
  else: logger.warning(f"[Apify] Actor success but dataset empty for {video_url}. Response: {results}"); return None
187
  except json.JSONDecodeError: logger.error(f"[Apify] Failed JSON decode for {video_url}. Status:{response.status_code}. Resp:{response.text[:200]}"); return None
188
  except Exception as e: logger.error(f"[Apify] Error processing success response for {video_url}: {e}", exc_info=True); return None
189
  elif response.status_code == 400: logger.error(f"[Apify] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
190
+ elif response.status_code == 401: logger.error("[Apify] Auth error (401). Check token."); return None
191
  else: logger.error(f"[Apify] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
192
  except httpx.TimeoutException: logger.error(f"[Apify] Timeout running actor for {video_url}"); return None
193
  except httpx.RequestError as e: logger.error(f"[Apify] Request error running actor for {video_url}: {e}"); return None
194
  except Exception as e: logger.error(f"[Apify] Unexpected error during Apify REST call for {video_url}: {e}", exc_info=True); return None
195
 
196
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
197
+ global SUPADATA_API_KEY, APIFY_API_TOKEN
 
198
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
199
  logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
200
  transcript_text = None
 
 
201
  logger.info("[Primary YT] Attempting youtube-transcript-api...")
202
  try:
203
  transcript_list = await asyncio.to_thread( YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'] )
 
209
  if "No transcript found" in str(e): logger.warning(f"[Primary YT] No transcript found.")
210
  elif "disabled" in str(e): logger.warning(f"[Primary YT] Transcripts disabled.")
211
  transcript_text = None
 
 
212
  if transcript_text is None:
213
  logger.info("[Fallback YT 1] Trying Supadata API...")
214
  if SUPADATA_API_KEY:
 
216
  if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text
217
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
218
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
 
 
219
  if transcript_text is None:
220
  logger.info("[Fallback YT 2] Trying Apify REST API...")
221
  if APIFY_API_TOKEN:
 
223
  if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify REST for {video_url}"); return transcript_text
224
  else: logger.warning(f"[Fallback YT 2] Apify REST failed or no content for {video_url}.")
225
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
 
226
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
227
+ return transcript_text
228
 
 
229
  async def get_website_content(url: str) -> Optional[str]:
 
230
  if not url: logger.error("get_website_content: No URL"); return None
231
  logger.info(f"[Primary Web] Fetching website content for: {url}")
232
  html_content = await fetch_url_content_for_scrape(url)
233
  if not html_content: return None
234
  try:
235
  def parse_html(content):
 
236
  soup = BeautifulSoup(content, DEFAULT_PARSER)
237
+ for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]): element.extract()
 
 
 
238
  main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
239
  target_element = main_content if main_content else soup.body
240
  if not target_element: logger.warning(f"[Primary Web] Could not find body/main for parsing {url}"); return None
 
242
  text = " ".join(lines)
243
  if not text: logger.warning(f"[Primary Web] Extracted text empty after clean for {url}"); return None
244
  return text
 
245
  text_content = await asyncio.to_thread(parse_html, html_content)
246
  if text_content: logger.info(f"[Primary Web] Success scrape for {url} (final len: {len(text_content)})"); return text_content
247
  else: return None
248
  except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
249
 
250
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
 
251
  if not url: logger.error("[Fallback Web API] No URL"); return None
252
  if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
253
  logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
 
254
  api_endpoint = "https://urltotext.com/api/v1/urltotext/"
255
  payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
256
+ headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" }
257
  try:
258
+ async with httpx.AsyncClient(timeout=45.0) as client:
259
  logger.debug(f"[Fallback Web API] Sending request to urltotext.com API for {url}")
260
  response = await client.post(api_endpoint, headers=headers, json=payload)
261
  logger.debug(f"[Fallback Web API] Received status {response.status_code} from urltotext.com API for {url}")
262
  if response.status_code == 200:
263
  try:
264
  data = response.json()
265
+ content = data.get("data", {}).get("content"); credits = data.get("credits_used", "N/A"); warning = data.get("data", {}).get("warning")
 
 
266
  if warning: logger.warning(f"[Fallback Web API] urltotext.com API Warning for {url}: {warning}")
267
  if content: logger.info(f"[Fallback Web API] Success via urltotext.com API for {url}. Len: {len(content)}. Credits: {credits}"); return content.strip()
268
  else: logger.warning(f"[Fallback Web API] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
269
  except json.JSONDecodeError: logger.error(f"[Fallback Web API] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
270
  except Exception as e: logger.error(f"[Fallback Web API] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
 
271
  elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[Fallback Web API] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
272
  else: logger.error(f"[Fallback Web API] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
273
  except httpx.TimeoutException: logger.error(f"[Fallback Web API] Timeout connecting to urltotext.com API for {url}"); return None
274
  except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
275
  except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
276
 
277
+ # --- Summarization Function ---
278
  async def generate_summary(text: str, summary_type: str) -> str:
279
+ global OPENROUTER_API_KEY, OPENROUTER_MODEL
 
280
  logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
281
  if not OPENROUTER_API_KEY: logger.error("OpenRouter key missing for generate_summary."); return "Error: AI model configuration key missing."
282
+ if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "• Clear and simple language suitable for someone unfamiliar with the topic.\n" "• Uses British English spellings throughout.\n" "• Straightforward and understandable vocabulary; avoid complex terms.\n" "• Presented as ONE SINGLE PARAGRAPH.\n" "• No more than 85 words maximum; but does not have to be exactly 85.\n" "• Considers the entire text content equally.\n" "• Uses semicolons (;) instead of em dashes (– or —).\n\n" "Here is the text to summarise:")
283
+ else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "• For each distinct topic or section identified in the text, create a heading.\n" "• Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "• Immediately following each heading, list the key points as a bulleted list.\n" "• Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "• The text within each bullet point should NOT contain any bold formatting.\n" "• Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "• Use British English spellings throughout.\n" "• Avoid overly complex or advanced vocabulary.\n" "• Keep bullet points concise.\n" "• Ensure the entire summary takes no more than two minutes to read.\n" "• Consider the entire text's content, not just the beginning or a few topics.\n" "• Use semicolons (;) instead of em dashes (– or —).\n\n" "Here is the text to summarise:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  MAX_INPUT_LENGTH = 500000
285
+ if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
 
 
286
  full_prompt = f"{prompt}\n\n{text}"
287
+ headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
 
 
 
 
288
  try:
289
+ async with httpx.AsyncClient(timeout=60.0) as client:
290
+ logger.debug(f"Sending request to OpenRouter ({OPENROUTER_MODEL})..."); response = await client.post(openrouter_api_endpoint, headers=headers, json=payload); logger.debug(f"Received status {response.status_code} from OpenRouter.")
 
 
291
  if response.status_code == 200:
292
  try:
293
  data = response.json()
 
301
  else: logger.error(f"Unexpected choices structure in OpenRouter resp: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse AI response (choices)."
302
  except json.JSONDecodeError: logger.error(f"Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand AI response."
303
  except Exception as e: logger.error(f"Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing AI response."
 
304
  elif response.status_code == 401: logger.error("OpenRouter API key invalid (401)."); return "Error: AI model configuration key is invalid."
305
  elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
306
  elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
 
310
  except httpx.RequestError as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, error connecting to AI service."
311
  except Exception as e: logger.error(f"Unexpected error in generate_summary (OpenRouter): {e}", exc_info=True); return "Sorry, unexpected error generating summary."
312
 
313
+ # --- Background Task Processing ---
314
  async def process_summary_task(
315
  user_id: int, chat_id: int, message_id_to_edit: Optional[int],
316
+ url: str, summary_type: str, bot_token: str
317
  ) -> None:
318
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"
319
  logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
320
+ background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
321
+ try: background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 ); bot = Bot(token=bot_token, request=background_request)
 
 
 
322
  except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
 
323
  content = None; user_feedback_message = None; success = False
324
+ status_message_id = message_id_to_edit; message_to_delete_later_id : Optional[int] = None
 
 
325
  try:
 
326
  processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
327
  if status_message_id:
328
+ try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None ); logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
329
+ except Exception as e: logger.warning(f"[Task {task_id}] Could not edit original message {status_message_id}: {e}. Sending new."); status_message_id = None
330
+ if not status_message_id:
 
 
 
 
 
331
  try:
332
  status_message = await retry_bot_operation( bot.send_message, chat_id=chat_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN )
333
  if status_message: message_to_delete_later_id = status_message.message_id; logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
334
  else: raise RuntimeError("Failed to send status message after retries.")
335
+ except Exception as e: logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}"); raise
 
 
336
  try:
337
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
338
+ is_youtube = is_youtube_url(url); logger.debug(f"[Task {task_id}] URL type: {'YouTube' if is_youtube else 'Website'}")
 
 
339
  if is_youtube:
340
  video_id = extract_youtube_id(url)
341
+ if video_id: content = await get_youtube_transcript(video_id, url)
342
  else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
343
+ if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
344
+ else:
345
+ content = await get_website_content(url)
346
+ if not content:
 
347
  logger.warning(f"[Task {task_id}] Primary web scrape failed for {url}. Trying fallback API.")
348
+ global URLTOTEXT_API_KEY
349
  if URLTOTEXT_API_KEY:
350
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
351
+ content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
352
  if not content: user_feedback_message = "Sorry, I couldn't fetch content from that website using either method (blocked/inaccessible/empty?)."
353
+ else: user_feedback_message = "Sorry, I couldn't fetch content from that website (blocked/inaccessible/empty?). The fallback method is not configured."
 
 
 
354
  if content:
355
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
356
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
 
357
  final_summary = await generate_summary(content, summary_type)
358
+ if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
 
 
359
  else:
360
+ max_length = 4096; summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
361
+ await retry_bot_operation( bot.send_message, chat_id=chat_id, text=summary_parts[0], parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True} )
 
 
 
 
 
362
  for part in summary_parts[1:]: await asyncio.sleep(0.5); await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True} )
363
+ success = True; logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts)."); user_feedback_message = None
364
+ elif user_feedback_message: logger.warning(f"[Task {task_id}] Sending failure feedback: {user_feedback_message}"); await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )
 
 
 
 
 
 
365
  except Exception as e:
366
+ logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True); user_feedback_message = "Oops! Something went really wrong. Please try again later."
 
 
367
  try: await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
368
  except Exception: logger.error(f"[Task {task_id}] Failed to send unexpected error feedback.")
369
+ except Exception as outer_e:
 
370
  logger.critical(f"[Task {task_id}] Critical outer error: {outer_e}", exc_info=True)
371
  try:
372
  if bot: await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ Critical internal error occurred." )
373
  except Exception: logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
 
374
  finally:
 
375
  delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
376
  if delete_target_id and bot:
377
+ try: await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id); logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
 
 
 
378
  except Exception as del_e: logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
 
379
  if background_request and hasattr(background_request, '_client') and background_request._client:
380
  try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
381
  except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
382
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
383
 
384
+ # --- Telegram Bot Handlers ---
 
385
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
386
  user = update.effective_user; mention = user.mention_html()
387
  if not user or not update.message: return
 
392
  user = update.effective_user
393
  if not user or not update.message: return
394
  logger.info(f"User {user.id} used /help.")
395
+ help_text = ( "🔍 How to use this bot:\n\n" "1. Send me any YouTube video link or website URL.\n" "2. I'll ask you how you want it summarized (paragraph or points).\n" "3. Click the button for your choice.\n" "4. Wait for the summary!\n\n" "I'll try multiple methods to get content if the first one fails (especially for YouTube transcripts).\n\n" "Commands:\n" "`/start` - Display welcome message\n" "`/help` - Show this help message" )
 
 
 
 
 
 
 
 
 
396
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
397
 
398
  async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
399
  if not update.message or not update.message.text: return
400
  url = update.message.text.strip(); user = update.effective_user
401
  if not user: return
402
+ if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]: logger.debug(f"Ignoring non-URL from {user.id}: {url}"); return
 
 
403
  logger.info(f"User {user.id} sent potential URL: {url}")
404
+ context.user_data['url_to_summarize'] = url; context.user_data['original_message_id'] = update.message.message_id
 
 
405
  keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
406
  reply_markup = InlineKeyboardMarkup(keyboard)
407
+ await update.message.reply_text( f"Okay, I see this link:\n{url}\n\nHow would you like it summarized?", reply_markup=reply_markup, disable_web_page_preview=True )
 
 
408
 
409
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
 
410
  query = update.callback_query
411
  if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
412
  user = query.from_user; summary_type = query.data; query_id = query.id
413
  try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id}")
414
  except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True)
415
+ url = context.user_data.get('url_to_summarize'); message_id_to_edit = query.message.message_id
 
 
416
  logger.info(f"User {user.id} chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
 
417
  if not url:
418
  logger.warning(f"No URL in context for user {user.id} (cb {query_id}).")
419
+ try:
420
+ await query.edit_message_text(text="Sorry, I couldn't find the URL associated with this request. Please send the link again.")
421
+ # *** FIX: Correct syntax for nested try/except ***
422
+ except Exception as e:
423
+ logger.error(f"Failed edit 'URL not found' msg: {e}")
424
+ # Put the fallback try block on a new indented line
425
+ try:
426
+ await context.bot.send_message(chat_id=user.id, text="Sorry, context lost. Send link again.")
427
+ except Exception:
428
+ pass # Ignore if sending fallback also fails
429
  return
430
 
431
+ context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
 
 
 
 
432
  global TELEGRAM_TOKEN, OPENROUTER_API_KEY
433
  if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing!"); try: await query.edit_message_text(text="❌ Bot config error.") except Exception: pass; return
434
  if not OPENROUTER_API_KEY: logger.error("OpenRouter key missing!"); try: await query.edit_message_text(text="❌ AI config error.") except Exception: pass; return
 
435
  logger.info(f"Scheduling task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
436
+ asyncio.create_task( process_summary_task( user_id=user.id, chat_id=query.message.chat_id, message_id_to_edit=message_id_to_edit, url=url, summary_type=summary_type, bot_token=TELEGRAM_TOKEN ), name=f"SummaryTask-{user.id}-{message_id_to_edit}" )
 
 
437
 
 
438
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
439
+ ignore_errors = (AttributeError, )
440
+ if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): logger.debug(f"Ignoring known/handled error in error_handler: {context.error}"); return
 
 
 
 
441
  logger.error("Exception while handling an update:", exc_info=context.error)
442
 
443
+ # --- Bot Setup ---
444
  async def setup_bot_config() -> Application:
445
+ logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
 
 
446
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
447
  custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
448
  application = Application.builder().token(TELEGRAM_TOKEN).request(custom_request).build()
449
+ application.add_handler(CommandHandler("start", start)); application.add_handler(CommandHandler("help", help_command))
450
+ application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url)); application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
451
+ application.add_error_handler(error_handler); logger.info("Telegram application handlers configured."); return application
 
 
 
 
452
 
453
+ # --- ASGI Lifespan ---
454
  @contextlib.asynccontextmanager
455
  async def lifespan(app: Starlette):
456
+ global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
457
+ logger.info("ASGI Lifespan: Startup initiated...");
 
 
458
  if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
459
  try:
460
+ ptb_app = await setup_bot_config(); await ptb_app.initialize(); bot_info = await ptb_app.bot.get_me(); logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
 
 
 
 
461
  current_webhook_info = await ptb_app.bot.get_webhook_info()
462
  if current_webhook_info and current_webhook_info.url:
463
  logger.info(f"Found existing webhook: {current_webhook_info.url}. Deleting...")
 
465
  if await ptb_app.bot.delete_webhook(drop_pending_updates=True): logger.info("Webhook deleted.")
466
  else: logger.warning("Failed delete webhook (API returned False).")
467
  except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1)
468
+ space_host = os.environ.get("SPACE_HOST"); webhook_path = "/webhook"; full_webhook_url = None
 
469
  if space_host:
470
+ protocol = "https://"; host = space_host.split('://')[-1]; full_webhook_url = f"{protocol}{host.rstrip('/')}{webhook_path}"
 
471
  if full_webhook_url:
472
+ logger.info(f"Setting webhook: {full_webhook_url}"); set_webhook_args = { "url": full_webhook_url, "allowed_updates": Update.ALL_TYPES, "drop_pending_updates": True }
 
473
  if WEBHOOK_SECRET: set_webhook_args["secret_token"] = WEBHOOK_SECRET; logger.info("Using webhook secret.")
474
  await asyncio.sleep(1.0)
475
  try:
476
+ await ptb_app.bot.set_webhook(**set_webhook_args); webhook_info = await ptb_app.bot.get_webhook_info()
 
477
  if webhook_info.url == full_webhook_url: logger.info(f"Webhook set: URL='{webhook_info.url}', Secret={bool(WEBHOOK_SECRET)}")
478
  else: logger.error(f"Webhook URL mismatch! Expected '{full_webhook_url}', Got '{webhook_info.url}'"); raise RuntimeError("Webhook URL mismatch.")
479
+ await ptb_app.start(); logger.info("PTB Application started (webhook mode).")
 
480
  except Exception as e: logger.error(f"FATAL: Failed set webhook: {e}", exc_info=True); raise RuntimeError(f"Failed set webhook: {e}") from e
481
  else: logger.critical("Could not construct webhook URL."); raise RuntimeError("Webhook URL undetermined.")
482
  else: logger.critical("SPACE_HOST missing."); raise RuntimeError("SPACE_HOST env var missing.")
483
+ logger.info("ASGI Lifespan: Startup complete."); yield
 
 
484
  except Exception as startup_err:
485
  logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
486
  if ptb_app:
487
  if ptb_app.running: await ptb_app.stop()
488
  await ptb_app.shutdown()
489
  raise
490
+ finally:
491
  logger.info("ASGI Lifespan: Shutdown initiated...")
492
  if ptb_app:
493
  if ptb_app.running: logger.info("Stopping PTB..."); await ptb_app.stop()
494
+ logger.info("Shutting down PTB..."); await ptb_app.shutdown(); logger.info("PTB Application shut down.")
 
495
  else: logger.info("PTB application not initialized or failed.")
496
  logger.info("ASGI Lifespan: Shutdown complete.")
497
 
498
+ # --- Starlette Route Handlers ---
499
  async def health_check(request: Request) -> PlainTextResponse:
500
+ global OPENROUTER_MODEL, APIFY_ACTOR_ID, _apify_token_exists; bot_status = "Not Initialized"
 
 
501
  if ptb_app and ptb_app.bot:
502
  try:
503
  if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
504
  else: bot_status = "Initialized/Not running"
505
  except Exception as e: bot_status = f"Error checking status: {e}"
 
506
  return PlainTextResponse(f"TG Bot Summarizer - Status: {bot_status}\nModel: {OPENROUTER_MODEL}\nApify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}")
507
 
508
  async def telegram_webhook(request: Request) -> Response:
509
+ global WEBHOOK_SECRET
 
510
  if not ptb_app: logger.error("Webhook recv but PTB not initialized."); return PlainTextResponse('Bot not initialized', status_code=503)
511
  if not ptb_app.running: logger.warning("Webhook recv but PTB not running."); return PlainTextResponse('Bot not running', status_code=503)
512
  try:
 
513
  if WEBHOOK_SECRET:
514
  token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
515
  if token_header != WEBHOOK_SECRET: logger.warning(f"Webhook invalid secret. Header: '{token_header}'"); return Response(content="Invalid secret token", status_code=403)
516
+ update_data = await request.json(); update = Update.de_json(data=update_data, bot=ptb_app.bot); logger.debug(f"Processing update_id: {update.update_id} via webhook")
517
+ await ptb_app.process_update(update); return Response(status_code=200) # OK
 
 
 
518
  except json.JSONDecodeError: logger.error("Webhook invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
519
  except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
520
 
521
+ # --- Create Starlette ASGI Application ---
522
+ app = Starlette( debug=False, lifespan=lifespan, routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
 
 
523
  logger.info("Starlette ASGI application created with native routes.")
524
 
525
+ # --- Development Server Block ---
526
  if __name__ == '__main__':
527
  import uvicorn
528
  logger.warning("Running in development mode using Uvicorn directly")