fmab777 commited on
Commit
3ac7b5f
·
verified ·
1 Parent(s): 2ebc056

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +502 -768
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Applying fixes for apparent_encoding, bot cleanup, and Apify actor name)
2
  import os
3
  import re
4
  import logging
@@ -27,32 +27,20 @@ from telegram.ext import (
27
  )
28
  from telegram.constants import ParseMode
29
  from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest, TelegramError
30
- from telegram.request import HTTPXRequest, BaseRequest # Import BaseRequest for type hinting
31
 
32
  # --- Other Libraries ---
33
- import httpx
34
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
35
- # Make requests optional if only used for sync fallback (currently not)
36
- # import requests
37
  from bs4 import BeautifulSoup
38
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type, before_sleep_log
39
- # Optional: Import lxml if installed (usually faster parsing)
40
  try:
41
  import lxml
42
  DEFAULT_PARSER = 'lxml'
43
  except ImportError:
44
  DEFAULT_PARSER = 'html.parser'
45
 
46
-
47
- _apify_token_exists = bool(os.environ.get('APIFY_API_TOKEN'))
48
- if _apify_token_exists:
49
- from apify_client import ApifyClient
50
- from apify_client.consts import ActorJobStatus
51
- from apify_client.errors import ApifyApiError # Import specific error
52
- else:
53
- ApifyClient = None # type: ignore
54
- ApifyApiError = None # type: ignore
55
-
56
 
57
  # --- Logging Setup ---
58
  logging.basicConfig(
@@ -60,7 +48,7 @@ logging.basicConfig(
60
  level=logging.INFO
61
  )
62
  logging.getLogger("httpx").setLevel(logging.WARNING)
63
- if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
64
  logging.getLogger("telegram.ext").setLevel(logging.INFO)
65
  logging.getLogger('telegram.bot').setLevel(logging.INFO)
66
  logging.getLogger("urllib3").setLevel(logging.INFO)
@@ -73,11 +61,10 @@ logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
73
  # --- Global variable for PTB app ---
74
  ptb_app: Optional[Application] = None
75
 
76
- # --- Environment Variable Loading ---
77
- logger.info("Attempting to load secrets...")
78
  def get_secret(secret_name):
79
  value = os.environ.get(secret_name)
80
- # Avoid logging full length of very long secrets like Supabase keys
81
  log_length = min(len(value), 8) if value else 0
82
  status = "Found" if value else "Not Found"
83
  logger.info(f"Secret '{secret_name}': {status} (Value starts with: {value[:log_length]}...)")
@@ -85,904 +72,651 @@ def get_secret(secret_name):
85
 
86
  TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
87
  OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY')
88
- URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
89
  SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
90
- APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
91
- WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET') # Added for webhook security
92
- OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "anthropic/claude-3.5-sonnet")
93
- APIFY_ACTOR_NAME = os.environ.get("APIFY_ACTOR_NAME", "pocesar/youtube-scraper") # Use env var or default
94
- logger.info("Secret loading attempt finished.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  logger.info(f"Using OpenRouter Model: {OPENROUTER_MODEL}")
96
- if _apify_token_exists:
97
- logger.info(f"Using Apify Actor: {APIFY_ACTOR_NAME}")
98
 
 
99
 
100
- # --- Retry Decorator for Bot Operations ---
101
- @retry(
102
- stop=stop_after_attempt(4),
103
- wait=wait_exponential(multiplier=1, min=2, max=15),
104
- retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)),
105
- before_sleep=before_sleep_log(logger, logging.WARNING),
106
- reraise=True
107
- )
108
  async def retry_bot_operation(func, *args, **kwargs):
109
- """Wrapper to retry bot operations with exponential backoff."""
110
- try:
111
- return await func(*args, **kwargs)
112
  except BadRequest as e:
113
- # Added specific check for common, non-fatal BadRequests
114
- ignore_errors = [
115
- "message is not modified",
116
- "query is too old",
117
- "message to edit not found",
118
- "chat not found", # Might indicate user blocked bot, non-retryable
119
- "bot was blocked by the user",
120
- ]
121
- if any(err in str(e).lower() for err in ignore_errors):
122
- logger.warning(f"Ignoring non-critical BadRequest during bot operation: {e}")
123
- return None # Indicate no action needed or failed benignly
124
- logger.error(f"Potentially critical BadRequest during bot operation: {e}")
125
- raise # Reraise other BadRequests (might be retryable by tenacity)
126
- except TelegramError as e:
127
- logger.warning(f"TelegramError during bot operation (will retry if applicable): {e}")
128
- raise
129
- except Exception as e:
130
- logger.error(f"Unexpected error during bot operation: {e}", exc_info=True)
131
- raise
132
 
133
-
134
- # --- Helper Functions ---
135
  def is_youtube_url(url):
136
- """Checks if the URL is a valid YouTube video or shorts URL."""
137
- youtube_regex = re.compile(
138
- r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/'
139
- r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?'
140
- r'([\w-]{11})'
141
- r'(?:\S+)?',
142
- re.IGNORECASE)
143
- match = youtube_regex.search(url)
144
- logger.debug(f"is_youtube_url check for '{url}': {'Match found' if match else 'No match'}")
145
- return bool(match)
146
-
147
  def extract_youtube_id(url):
148
- """Extracts the YouTube video ID from a URL."""
149
- youtube_regex = re.compile(
150
- r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/'
151
- r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?'
152
- r'([\w-]{11})'
153
- r'(?:\S+)?',
154
- re.IGNORECASE)
155
  match = youtube_regex.search(url)
156
- if match:
157
- video_id = match.group(1)
158
- logger.debug(f"Extracted YouTube ID '{video_id}' from URL: {url}")
159
- return video_id
160
- else:
161
- logger.warning(f"Could not extract YouTube ID from URL: {url}")
162
- return None
163
-
164
- # --- Content Fetching Functions ---
165
-
166
- # Using httpx for async requests
167
- async def fetch_url_content(url: str, timeout: int = 20) -> Optional[str]:
168
- """Fetches content from a URL using httpx asynchronously."""
169
- headers = {
170
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', # Updated UA
171
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
172
- 'Accept-Language': 'en-US,en;q=0.9',
173
- 'Connection': 'keep-alive',
174
  }
175
  try:
176
- async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers, http2=True) as client: # Enable HTTP/2
 
177
  response = await client.get(url)
 
178
  response.raise_for_status()
179
- # *** FIX: Use response.encoding or response.charset_encoding ***
180
- # response.encoding will try to decode based on headers/content
181
- # If it fails, default to utf-8
182
- try:
183
- # Accessing .text forces encoding detection
184
- content = response.text
185
- logger.debug(f"Detected encoding for {url}: {response.encoding}")
186
- return content
187
- except UnicodeDecodeError:
188
- logger.warning(f"UnicodeDecodeError for {url} with encoding {response.encoding}. Trying raw bytes with utf-8.")
189
- # Fallback: read bytes and decode utf-8 ignoring errors
190
- return response.content.decode('utf-8', errors='ignore')
191
- except Exception as e:
192
- logger.error(f"Error decoding response for {url}: {e}")
193
- return None # Cannot decode reliably
194
-
195
- except httpx.HTTPStatusError as e:
196
- logger.error(f"HTTP error fetching {url}: {e.response.status_code} - {e}")
197
- except httpx.ConnectError as e:
198
- # Catch specific connection errors like DNS failures
199
- logger.error(f"Connection error fetching {url}: {e}")
200
- except httpx.TimeoutException as e:
201
- logger.error(f"Timeout error fetching {url}: {e}")
202
- except httpx.RequestError as e:
203
- logger.error(f"Request error fetching {url}: {e}")
204
- except Exception as e:
205
- logger.error(f"Unexpected error fetching {url}: {e}", exc_info=True)
206
  return None
207
 
208
-
209
  async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
210
- """Fetches YouTube transcript using Supadata API."""
211
- if not api_key: return None
212
- api_url = f"https://api.supadata.net/youtube/transcript?video_id={video_id}"
213
- headers = {'X-API-Key': api_key, 'Accept': 'application/json'}
214
- logger.info(f"Attempting transcript fetch via Supadata for {video_id}")
 
 
 
215
  try:
216
- # Note: If CERTIFICATE_VERIFY_FAILED persists, it's an issue with api.supadata.net's cert.
217
- # Do NOT disable verification (verify=False) unless absolutely necessary and understood.
218
  async with httpx.AsyncClient(timeout=30.0) as client:
219
- response = await client.get(api_url, headers=headers)
220
- response.raise_for_status()
221
- data = response.json()
222
- if data and isinstance(data, list) and data[0].get("text"):
223
- transcript = " ".join([item["text"] for item in data if "text" in item])
224
- logger.info(f"Supadata transcript fetched successfully for {video_id} (length: {len(transcript)})")
225
- return transcript
226
- else:
227
- logger.warning(f"Supadata response format unexpected or empty for {video_id}: {data}")
228
- return None
229
- except httpx.ConnectError as e:
230
- # Log SSL verification errors specifically if they occur
231
- if "CERTIFICATE_VERIFY_FAILED" in str(e):
232
- logger.error(f"Supadata API SSL certificate verification failed for {video_id}: {e}. This is likely an issue with api.supadata.net's certificate.")
233
- else:
234
- logger.error(f"Supadata API connection error for {video_id}: {e}")
235
- except httpx.HTTPStatusError as e:
236
- logger.error(f"Supadata API HTTP error for {video_id}: {e.response.status_code} - {e}")
237
- except Exception as e:
238
- logger.error(f"Error fetching transcript via Supadata for {video_id}: {e}", exc_info=True)
239
- return None
240
-
241
- async def get_transcript_via_apify(video_id: str, api_token: str) -> Optional[str]:
242
- """Fetches YouTube transcript using Apify YouTube Scraper Actor."""
243
- global APIFY_ACTOR_NAME # Use the globally configured/default actor name
244
- if not ApifyClient or not api_token: return None
245
- logger.info(f"Attempting transcript fetch via Apify (Actor: {APIFY_ACTOR_NAME}) for {video_id}")
 
 
 
 
 
 
 
 
 
 
 
 
246
  try:
247
- client = ApifyClient(api_token)
248
- # *** FIX: Use the correct actor name ***
249
- actor = client.actor(APIFY_ACTOR_NAME)
250
- if not actor:
251
- logger.error(f"Could not find Apify actor: {APIFY_ACTOR_NAME}")
252
- return None
253
-
254
- actor_run = await asyncio.to_thread(
255
- actor.call, # Run blocking call in thread
256
- run_input={
257
- "startUrls": [{"url": f"https://www.youtube.com/watch?v={video_id}"}], # Use correct input format if needed
258
- "maxResultStreams": 0,
259
- "maxResults": 1, # Only need info for one video
260
- "maxResultCommentStreams": 0,
261
- "proxyConfiguration": {"useApifyProxy": True},
262
- "subtitles": True, # Explicitly request subtitles/transcript
263
- "maxDurationMinutes": 0, # No duration limit
264
- "skipComments": True,
265
- # Check actor docs for exact input schema
266
- },
267
- timeout_secs=120, # Timeout for the call itself
268
- wait_secs=120 # Timeout for waiting for run completion
269
- )
270
-
271
- if not actor_run or 'defaultDatasetId' not in actor_run:
272
- logger.warning(f"Apify actor run did not return expected dataset ID for {video_id}. Run details: {actor_run}")
273
- return None
274
-
275
- logger.info(f"Apify actor run started/retrieved for {video_id}. Dataset ID: {actor_run['defaultDatasetId']}")
276
-
277
- # Fetch results from the dataset
278
- dataset = client.dataset(actor_run["defaultDatasetId"])
279
- # Run list_items in thread as it can be blocking I/O
280
- dataset_page = await asyncio.to_thread(dataset.list_items, limit=5) # Limit items fetched initially
281
-
282
- if dataset_page and dataset_page.items:
283
- for item in dataset_page.items:
284
- # Apify output structure can vary; adapt as needed
285
- transcript_text = item.get('transcript') # Check common keys
286
- if not transcript_text and 'subtitles' in item: # Check alternative
287
- if isinstance(item['subtitles'], list) and len(item['subtitles']) > 0:
288
- transcript_text = " ".join(line.get('text', '') for line in item['subtitles'][0].get('lines', []))
289
- elif isinstance(item['subtitles'], str): # Sometimes it's just a string
290
- transcript_text = item['subtitles']
291
-
292
- if transcript_text and isinstance(transcript_text, str) and transcript_text.strip():
293
- logger.info(f"Apify transcript fetched successfully for {video_id} (length: {len(transcript_text)})")
294
- return transcript_text.strip()
295
-
296
- logger.warning(f"Apify run completed for {video_id}, but no transcript found in dataset items.")
297
- else:
298
- logger.warning(f"Apify run completed for {video_id}, but dataset was empty or inaccessible.")
299
-
300
- except ApifyApiError as e:
301
- # Catch specific Apify errors like "Actor not found"
302
- logger.error(f"Apify API error fetching transcript for {video_id} (Actor: {APIFY_ACTOR_NAME}): {e}")
303
- except Exception as e:
304
- logger.error(f"Unexpected error fetching transcript via Apify for {video_id}: {e}", exc_info=True)
305
- return None
306
-
307
-
308
- async def get_youtube_transcript(video_id: str, url: str, supadata_key: Optional[str], apify_token: Optional[str]) -> Optional[str]:
309
- """Tries different methods to get a YouTube transcript."""
310
- transcript = None
311
-
312
- # 1. Try Supadata API (if key exists)
313
- if supadata_key:
314
- transcript = await get_transcript_via_supadata(video_id, supadata_key)
315
- if transcript: return transcript
316
-
317
- # 2. Try youtube-transcript-api (Direct method)
318
- logger.info(f"Attempting transcript fetch via youtube-transcript-api for {video_id}")
319
  try:
320
- transcript_list = await asyncio.to_thread(YouTubeTranscriptApi.get_transcript, video_id)
321
- transcript = " ".join([item['text'] for item in transcript_list])
322
- logger.info(f"youtube-transcript-api transcript fetched successfully for {video_id} (length: {len(transcript)})")
323
- return transcript
324
- except (TranscriptsDisabled, NoTranscriptFound):
325
- logger.warning(f"Transcripts disabled or unavailable via youtube-transcript-api for {video_id}.")
326
  except Exception as e:
327
- logger.error(f"Error using youtube-transcript-api for {video_id}: {e}")
328
-
329
- # 3. Try Apify (if token exists and other methods failed)
330
- if not transcript and apify_token:
331
- transcript = await get_transcript_via_apify(video_id, apify_token)
332
- if transcript: return transcript
333
-
334
- logger.warning(f"Failed to retrieve transcript for YouTube video {video_id} using all available methods.")
335
- return None
336
-
337
- async def get_website_content_via_requests(url: str) -> Optional[str]:
338
- """Fetches and extracts main text content from a website using BeautifulSoup."""
339
- logger.info(f"Attempting website scrape via requests/BeautifulSoup for: {url}")
340
- html_content = await fetch_url_content(url)
341
- if not html_content:
342
- return None
343
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  try:
345
  def parse_html(content):
346
  # Use lxml if available, otherwise html.parser
347
  soup = BeautifulSoup(content, DEFAULT_PARSER)
348
- for script_or_style in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "iframe"]):
349
- script_or_style.decompose()
350
- # Consider targeting specific elements like <article>, <main>, .post-content etc.
351
- main_content = soup.find('article') or soup.find('main') or soup.body
352
- if not main_content: main_content = soup # Fallback to whole soup if no main tags
353
-
354
- text = main_content.get_text(separator='\n', strip=True)
355
- lines = (line.strip() for line in text.splitlines())
356
- chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
357
- text = '\n'.join(chunk for chunk in chunks if chunk)
358
  return text
359
 
360
  text_content = await asyncio.to_thread(parse_html, html_content)
361
-
362
- if text_content and len(text_content) > 100:
363
- logger.info(f"Successfully scraped content via requests/BeautifulSoup for {url} (length: {len(text_content)})")
364
- return text_content
365
- else:
366
- logger.warning(f"Scraping via requests/BeautifulSoup for {url} yielded minimal content (length: {len(text_content) if text_content else 0}).")
367
- return None
368
- except Exception as e:
369
- logger.error(f"Error parsing website content with BeautifulSoup for {url}: {e}", exc_info=True)
370
- return None
371
-
372
- async def get_website_content_via_urltotext_api(url: str, api_key: str) -> Optional[str]:
373
- """Fetches website content using the UrlToText API."""
374
- if not api_key: return None
375
- api_endpoint = "https://api.urltotext.ai/text"
376
- headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
377
- payload = {"url": url, "text_only": True}
378
- logger.info(f"Attempting website content fetch via UrlToText API for: {url}")
379
-
380
  try:
381
- async with httpx.AsyncClient(timeout=45.0) as client:
 
382
  response = await client.post(api_endpoint, headers=headers, json=payload)
383
- response.raise_for_status()
384
- data = response.json()
385
- if "text" in data and data["text"]:
386
- content = data["text"]
387
- logger.info(f"Successfully fetched content via UrlToText API for {url} (length: {len(content)})")
388
- return content
389
- else:
390
- logger.warning(f"UrlToText API response did not contain text for {url}. Response: {data}")
391
- return None
392
- except httpx.ConnectError as e:
393
- # Catch DNS error specifically if needed, but general ConnectError covers it
394
- logger.error(f"UrlToText API connection error for {url}: {e}. Check network/DNS.")
395
- except httpx.HTTPStatusError as e:
396
- logger.error(f"UrlToText API HTTP error for {url}: {e.response.status_code} - {e}")
397
- except Exception as e:
398
- logger.error(f"Error fetching content via UrlToText API for {url}: {e}", exc_info=True)
399
- return None
400
-
401
- # --- Summarization Function ---
402
- async def generate_summary(content: str, summary_type: str, api_key: Optional[str]) -> str:
403
- """Generates a summary using OpenRouter API."""
404
- global OPENROUTER_MODEL # Use the globally configured/default model
405
- if not api_key:
406
- return "Error: OpenRouter API key is not configured."
407
- if not content:
408
- return "Error: No content provided to summarize."
409
-
410
- if len(content) < 50:
411
- return "The provided content is too short to summarize effectively."
412
-
413
- max_chars = 100000
414
- if len(content) > max_chars:
415
- logger.warning(f"Content length ({len(content)}) exceeds max_chars ({max_chars}), truncating.")
416
- content = content[:max_chars]
417
-
418
- prompt_template = """
419
- Please summarize the following text. The summary should capture the key points and main ideas accurately and concisely.
420
- Provide the summary in {format_style} format.
421
-
422
- Text to summarize:
423
- ---
424
- {text}
425
- ---
426
-
427
- Summary ({format_style}):
428
- """
429
- format_style = "a concise paragraph" if summary_type == "paragraph" else "bullet points (using * or - for each point)"
430
- prompt = prompt_template.format(text=content, format_style=format_style)
431
-
432
- logger.info(f"Sending request to OpenRouter (Model: {OPENROUTER_MODEL}) for {summary_type} summary.")
 
 
 
 
 
 
 
 
 
 
 
 
 
433
 
434
  try:
435
- async with httpx.AsyncClient(timeout=120.0) as client:
436
- response = await client.post(
437
- url="https://openrouter.ai/api/v1/chat/completions",
438
- headers={
439
- "Authorization": f"Bearer {api_key}",
440
- "Content-Type": "application/json",
441
- # Optional: Add custom site identifier
442
- # "HTTP-Referer": "YOUR_SITE_URL",
443
- # "X-Title": "Telegram Summarizer Bot"
444
- },
445
- json={
446
- "model": OPENROUTER_MODEL,
447
- "messages": [{"role": "user", "content": prompt}],
448
- "max_tokens": 1024, # Adjust based on expected summary length
449
- # Optional: Add temperature, top_p etc. if needed
450
- },
451
- )
452
- response.raise_for_status()
453
- data = response.json()
454
-
455
- if data.get("choices") and len(data["choices"]) > 0:
456
- summary = data["choices"][0].get("message", {}).get("content", "").strip()
457
- if summary:
458
- logger.info(f"Summary generated successfully (length: {len(summary)})")
459
- # More robust Markdown escaping needed for PTB's MarkdownV2
460
- # For simple Markdown, basic escaping might suffice, but V2 is safer
461
- # summary = escape_markdown(summary) # Implement or import escape_markdown if using V2
462
- # Basic escaping for ParseMode.MARKDOWN:
463
- summary = summary.replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
464
- return summary
465
- else:
466
- logger.error("OpenRouter response successful, but summary content is empty.")
467
- return "Sorry, the AI generated an empty summary. Please try again."
468
- else:
469
- # Log the error details if available in the response
470
- error_details = data.get("error")
471
- logger.error(f"OpenRouter response format unexpected or error: {error_details or data}")
472
- return f"Sorry, I received an unexpected response or error from the summarization service: {error_details}"
473
-
474
- except httpx.HTTPStatusError as e:
475
- error_body = ""
476
- try: error_body = e.response.text
477
- except Exception: pass
478
- logger.error(f"OpenRouter API HTTP error: {e.response.status_code} - {e}. Response body: {error_body}")
479
- return f"Sorry, there was an error communicating with the summarization service (HTTP {e.response.status_code})."
480
- except Exception as e:
481
- logger.error(f"Error generating summary via OpenRouter: {e}", exc_info=True)
482
- return "Sorry, an unexpected error occurred while generating the summary."
483
-
484
-
485
- # --- Background Task Processing ---
486
  async def process_summary_task(
487
- user_id: int,
488
- chat_id: int,
489
- message_id_to_edit: Optional[int],
490
- url: str,
491
- summary_type: str,
492
- bot_token: str
493
  ) -> None:
494
- """Handles the actual fetching and summarization in a background task."""
495
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"
496
  logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
497
-
498
- # Create a new request handler and bot instance for this task
499
  background_request: Optional[BaseRequest] = None
500
  bot: Optional[Bot] = None
501
- try:
502
- background_request = HTTPXRequest(
503
- connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0, http_version="1.1"
504
- )
505
  bot = Bot(token=bot_token, request=background_request)
506
- except Exception as e:
507
- logger.critical(f"[Task {task_id}] Failed to create background bot instance: {e}", exc_info=True)
508
- # Cannot proceed without a bot instance
509
- return # Or raise? Silently failing might hide issues.
510
-
511
- content = None
512
- user_feedback_message = None
513
- success = False
514
- final_summary = ""
515
  status_message_id = message_id_to_edit
 
516
 
517
  try:
518
- # --- Inform User Processing Has Started ---
519
- processing_message_text = f" Working on your '{summary_type}' summary for:\n`{url}`\n\n_(Fetching & summarizing...)_"
520
  if status_message_id:
521
  try:
522
- await retry_bot_operation(
523
- bot.edit_message_text, chat_id=chat_id, message_id=status_message_id,
524
- text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None
525
- )
526
- logger.debug(f"[Task {task_id}] Successfully edited message {status_message_id} to 'Processing'")
527
  except Exception as e:
528
- logger.warning(f"[Task {task_id}] Could not edit original message {status_message_id}: {e}. Will send a new status message.")
529
- status_message_id = None
530
- if not status_message_id:
531
  try:
532
- status_message = await retry_bot_operation(
533
- bot.send_message, chat_id=chat_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN
534
- )
535
- if status_message: # Check if message was actually sent (retry might return None on ignore)
536
- status_message_id = status_message.message_id
537
- logger.debug(f"[Task {task_id}] Sent new status message {status_message_id}")
538
- else:
539
- logger.error(f"[Task {task_id}] Failed to send new status message after retries.")
540
- raise RuntimeError("Failed to send initial status message")
541
- except Exception as e:
542
- logger.error(f"[Task {task_id}] Failed to send new status message: {e}")
543
- raise RuntimeError("Failed to send initial status message") from e
544
-
545
- # --- Main Content Fetching and Summarization ---
546
  try:
547
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
548
- is_yt = is_youtube_url(url)
549
- logger.debug(f"[Task {task_id}] URL is YouTube: {is_yt}")
550
- if is_yt:
 
551
  video_id = extract_youtube_id(url)
552
- if video_id:
553
- logger.info(f"[Task {task_id}] Fetching YouTube transcript for {video_id}")
554
- content = await get_youtube_transcript(video_id, url, SUPADATA_API_KEY, APIFY_API_TOKEN)
555
- if not content: user_feedback_message = "⚠️ Sorry, I couldn't retrieve the transcript for that YouTube video. It might be disabled or unavailable."
556
- else: user_feedback_message = "⚠️ Couldn't extract a valid YouTube video ID from the link."
557
- else:
558
- logger.info(f"[Task {task_id}] Attempting website scrape for: {url}")
559
- content = await get_website_content_via_requests(url)
560
- if not content and URLTOTEXT_API_KEY:
561
- logger.info(f"[Task {task_id}] Basic scrape failed/insufficient, trying UrlToText API...")
562
- await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
563
- content = await get_website_content_via_urltotext_api(url, URLTOTEXT_API_KEY)
564
- if not content: user_feedback_message = "⚠️ Sorry, I couldn't fetch or extract meaningful content from that website."
 
 
565
 
 
566
  if content:
567
- logger.info(f"[Task {task_id}] Content fetched (length: {len(content)}). Generating '{summary_type}' summary.")
568
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
569
- final_summary = await generate_summary(content, summary_type, OPENROUTER_API_KEY)
 
570
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
571
- user_feedback_message = f"⚠️ {final_summary}"
572
- else: success = True
573
- # If content fetching failed, user_feedback_message is already set
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
 
575
  except Exception as e:
576
- logger.error(f"[Task {task_id}] Error during content fetching or summarization: {e}", exc_info=True)
577
- user_feedback_message = " An unexpected error occurred while processing your request."
578
-
579
- # --- Send Final Result or Error ---
580
- if success and final_summary:
581
- max_length = 4096
582
- summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
583
- await retry_bot_operation(
584
- bot.send_message, chat_id=chat_id, text=summary_parts[0],
585
- parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True}
586
- )
587
- for part in summary_parts[1:]:
588
- await asyncio.sleep(0.5)
589
- await retry_bot_operation(
590
- bot.send_message, chat_id=chat_id, text=part,
591
- parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True}
592
- )
593
- logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts).")
594
- elif user_feedback_message:
595
- logger.warning(f"[Task {task_id}] Sending feedback/error message: {user_feedback_message}")
596
- await retry_bot_operation(
597
- bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True}
598
- )
599
- else:
600
- logger.error(f"[Task {task_id}] Reached end of task without success or specific error message.")
601
- await retry_bot_operation(
602
- bot.send_message, chat_id=chat_id, text="❓ Something went wrong, but no specific error was identified.",
603
- link_preview_options={'is_disabled': True}
604
- )
605
-
606
- except Exception as e:
607
- logger.critical(f"[Task {task_id}] Critical error within task processing: {e}", exc_info=True)
608
  try:
609
- # Use the bot instance created at the start of the task if available
610
- if bot:
611
- await retry_bot_operation(
612
- bot.send_message, chat_id=chat_id,
613
- text="❌ A critical internal error occurred. Please report this if it persists."
614
- )
615
- else:
616
- logger.error("[Task ??] Cannot send critical error message: Bot instance not available.")
617
- except Exception:
618
- logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
619
  finally:
620
- # --- Clean up Status Message ---
621
- if status_message_id and bot: # Ensure bot exists before trying to delete
 
622
  try:
623
- await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=status_message_id)
624
- logger.debug(f"[Task {task_id}] Deleted status message {status_message_id}")
625
- except Exception as e:
626
- # Log benignly if deletion fails (e.g., message already deleted)
627
- logger.warning(f"[Task {task_id}] Failed to delete status message {status_message_id}: {e}")
628
-
629
- # --- Clean up Background Bot's HTTPX Client ---
630
- # *** FIX: Correct way to close client for manually created Bot ***
631
  if background_request and hasattr(background_request, '_client') and background_request._client:
632
- try:
633
- await background_request._client.aclose()
634
- logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
635
- except Exception as e:
636
- logger.warning(f"[Task {task_id}] Error closing background bot's HTTPX client: {e}")
637
- else:
638
- logger.debug(f"[Task {task_id}] Background bot's HTTPX client already closed or not found.")
639
-
640
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
641
 
642
 
643
- # --- Telegram Bot Handlers (Mostly Unchanged) ---
644
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
645
- """Handles the /start command."""
646
- user = update.effective_user
647
  if not user or not update.message: return
648
- logger.info(f"User {user.id} initiated /start.")
649
- mention = user.mention_html()
650
- start_message = (
651
- f"�� Hello {mention}!\n\n"
652
- "I can summarise YouTube videos or web articles for you.\n\n"
653
- "Just send me a link (URL) and I'll ask you whether you want the summary as a paragraph or bullet points.\n\n"
654
- "Type /help for more details."
655
- )
656
- await update.message.reply_html(start_message)
657
 
658
  async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
659
- """Handles the /help command."""
660
  user = update.effective_user
661
  if not user or not update.message: return
662
- logger.info(f"User {user.id} requested /help.")
663
- help_text = (
664
- "**How to Use Me:**\n"
665
- "1. Send me a direct link (URL) to a YouTube video or a web article.\n"
666
- "2. I will ask you to choose the summary format: `Paragraph` or `Points`.\n"
667
- "3. Click the button for your preferred format.\n"
668
- "4. I'll fetch the content, summarise it using AI, and send it back to you!\n\n"
669
- "**Important Notes:**\n"
670
- "- **YouTube:** Transcript availability varies. I try multiple methods.\n"
671
- "- **Websites:** I attempt basic scraping and can use UrlToText API (if configured) for complex sites.\n"
672
- "- **AI Summaries:** Provided by OpenRouter (using model: `{model}`). Accuracy may vary.\n"
673
- "- **Length Limits:** Very long content might be truncated.\n\n"
674
- "Just send a link to get started!"
675
- ).format(model=OPENROUTER_MODEL) # Show the model being used
676
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
677
 
678
  async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
679
- """Handles messages containing potential URLs."""
680
  if not update.message or not update.message.text: return
681
- message_text = update.message.text.strip()
682
- user = update.effective_user
683
  if not user: return
684
-
685
- url_pattern = r'https?://[^\s<>"]+|www\.[^\s<>"]+'
686
- match = re.search(url_pattern, message_text)
687
-
688
- if match:
689
- url = match.group(0)
690
- url = re.sub(r'[.,!?)\]>]+$', '', url) # Basic cleanup
691
- # Further clean URL if needed, e.g., removing tracking params (complex)
692
- logger.info(f"User {user.id} sent potential URL: {url}")
693
-
694
- context.user_data['url_to_summarize'] = url
695
- context.user_data['original_message_id'] = update.message.message_id
696
-
697
- keyboard = [
698
- [
699
- InlineKeyboardButton("📜 Paragraph", callback_data="paragraph"),
700
- InlineKeyboardButton("🔹 Bullet Points", callback_data="points")
701
- ]
702
- ]
703
- reply_markup = InlineKeyboardMarkup(keyboard)
704
- await update.message.reply_text(
705
- f"✅ Link received:\n`{url}`\n\nChoose your desired summary format:",
706
- reply_markup=reply_markup,
707
- parse_mode=ParseMode.MARKDOWN,
708
- link_preview_options={'is_disabled': True}
709
- )
710
- elif not message_text.startswith('/'):
711
- logger.debug(f"User {user.id} sent non-URL, non-command text: '{message_text[:50]}...'")
712
- if "http" in message_text or "www." in message_text or ".com" in message_text or ".org" in message_text or ".net" in message_text:
713
- await update.message.reply_text("Hmm, that looks like it might be a link, but please ensure it starts with `http://` or `https://` and is a valid URL.")
714
-
715
 
716
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
717
- """Handles button presses for summary type selection."""
718
  query = update.callback_query
719
- if not query or not query.message or not query.from_user:
720
- logger.warning("Callback query received without essential data.")
721
- if query: await query.answer()
722
- return
723
-
724
- user = query.from_user
725
- summary_type = query.data
726
- query_id = query.id
727
-
728
- try:
729
- await query.answer()
730
- logger.debug(f"Acknowledged callback query {query_id} from user {user.id}")
731
- except Exception as e:
732
- logger.error(f"Error answering callback query {query_id} from user {user.id}: {e}", exc_info=True)
733
 
734
  url = context.user_data.get('url_to_summarize')
735
- message_id_to_edit = query.message.message_id
736
-
737
- logger.info(f"User {user.id} chose summary type '{summary_type}' for URL associated with message {message_id_to_edit}")
738
 
739
  if not url:
740
- logger.warning(f"No URL found in user_data for user {user.id} (callback query {query_id}). Editing message.")
741
- try:
742
- # Edit the message the button was attached to
743
- await query.edit_message_text(text="⚠️ Oops! I couldn't find the link associated with this request. Please send the link again.")
744
- except Exception as e:
745
- logger.error(f"Failed to edit message to show 'URL not found' error: {e}")
746
  return
747
 
748
- context.user_data.pop('url_to_summarize', None)
749
- context.user_data.pop('original_message_id', None)
 
750
 
751
- if not TELEGRAM_TOKEN:
752
- logger.critical("TELEGRAM_TOKEN is missing, cannot start background task!")
753
- try:
754
- await query.edit_message_text(text="❌ Internal configuration error. Cannot process request.")
755
- except Exception: pass
756
- return
757
-
758
- logger.info(f"Scheduling background task for user {user.id}, chat {query.message.chat_id}, message {message_id_to_edit}, type {summary_type}")
759
- asyncio.create_task(
760
- process_summary_task(
761
- user_id=user.id,
762
- chat_id=query.message.chat_id,
763
- message_id_to_edit=message_id_to_edit,
764
- url=url,
765
- summary_type=summary_type,
766
- bot_token=TELEGRAM_TOKEN
767
- ),
768
- name=f"SummaryTask-{user.id}-{message_id_to_edit}"
769
- )
770
 
 
 
 
 
771
 
 
772
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
773
  """Log Errors caused by Updates."""
774
- # Ignore errors related to background task exceptions that were already handled/logged
775
- if isinstance(context.error, AttributeError) and "'Bot' object has no attribute 'session'" in str(context.error):
776
- logger.debug(f"Ignoring known cleanup error in error_handler: {context.error}")
 
777
  return
778
-
779
  logger.error("Exception while handling an update:", exc_info=context.error)
780
 
781
- # Example: Inform user on specific, potentially temporary errors
782
- # if isinstance(context.error, (NetworkError, TimedOut)):
783
- # try:
784
- # if update and isinstance(update, Update) and update.effective_chat:
785
- # await context.bot.send_message(
786
- # chat_id=update.effective_chat.id,
787
- # text="I'm having temporary network issues. Please try again in a moment."
788
- # )
789
- # except Exception as e:
790
- # logger.error(f"Failed to send error notification to user: {e}")
791
-
792
-
793
- # --- Bot Setup Function ---
794
  async def setup_bot_config() -> Application:
795
  """Configures the PTB Application."""
796
  logger.info("Configuring Telegram Application...")
797
- if not TELEGRAM_TOKEN:
798
- raise ValueError("TELEGRAM_TOKEN environment variable not set.")
799
-
800
- custom_request = HTTPXRequest(
801
- connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0, http_version="1.1"
802
- )
803
-
804
- application = (
805
- Application.builder()
806
- .token(TELEGRAM_TOKEN)
807
- .request(custom_request)
808
- .build()
809
- )
810
-
811
  application.add_handler(CommandHandler("start", start))
812
  application.add_handler(CommandHandler("help", help_command))
813
  application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
814
  application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
815
  application.add_error_handler(error_handler)
816
-
817
  logger.info("Telegram application handlers configured.")
818
  return application
819
 
820
- # --- ASGI Lifespan Context Manager ---
821
  @contextlib.asynccontextmanager
822
  async def lifespan(app: Starlette):
823
  """Handles PTB startup and shutdown during ASGI lifespan."""
824
- global ptb_app, WEBHOOK_SECRET # Make secret global for access in webhook handler
825
- logger.info("ASGI Lifespan: Startup sequence initiated...")
826
-
827
- if not TELEGRAM_TOKEN:
828
- logger.critical("TELEGRAM_TOKEN is not set. Bot cannot start.")
829
- raise RuntimeError("Telegram token missing.")
830
-
831
- bot_info_text = "Bot info not available yet."
832
  try:
833
  ptb_app = await setup_bot_config()
834
  await ptb_app.initialize()
835
  bot_info = await ptb_app.bot.get_me()
836
- bot_info_text = f"@{bot_info.username} (ID: {bot_info.id})"
837
- logger.info(f"Bot initialized: {bot_info_text}")
838
-
839
  current_webhook_info = await ptb_app.bot.get_webhook_info()
840
  if current_webhook_info and current_webhook_info.url:
841
- logger.info(f"Found existing webhook: {current_webhook_info.url}. Attempting to delete it.")
842
  try:
843
- # Use drop_pending_updates=False if you want to process updates accumulated while down
844
- if await ptb_app.bot.delete_webhook(drop_pending_updates=True):
845
- logger.info("Existing webhook deleted successfully.")
846
- else:
847
- logger.warning("Failed to delete existing webhook (API returned False).")
848
- except Exception as e:
849
- logger.warning(f"Could not delete existing webhook: {e}")
850
- await asyncio.sleep(1)
851
-
852
  space_host = os.environ.get("SPACE_HOST")
853
- webhook_path = "/webhook"
854
- full_webhook_url = None
855
  if space_host:
856
- protocol = "https://" # Assume HTTPS for HF Spaces
857
- host = space_host.split('://')[-1] # Get host part regardless of existing protocol
858
  full_webhook_url = f"{protocol}{host.rstrip('/')}{webhook_path}"
859
-
860
  if full_webhook_url:
861
- logger.info(f"Attempting to set webhook to: {full_webhook_url}")
862
- # Use secret token if configured
863
- set_webhook_args = {
864
- "url": full_webhook_url,
865
- "allowed_updates": Update.ALL_TYPES,
866
- "drop_pending_updates": True,
867
- }
868
- if WEBHOOK_SECRET:
869
- set_webhook_args["secret_token"] = WEBHOOK_SECRET
870
- logger.info("Webhook will be set with a secret token.")
871
-
872
- await asyncio.sleep(1.0) # Slightly shorter wait
873
  try:
874
  await ptb_app.bot.set_webhook(**set_webhook_args)
875
  webhook_info = await ptb_app.bot.get_webhook_info()
876
- # Check if the URL and secret status match expectations
877
- if webhook_info.url == full_webhook_url:
878
- logger.info(f"Webhook successfully set: URL='{webhook_info.url}', Pending={webhook_info.pending_update_count}, Secret={bool(WEBHOOK_SECRET)}")
879
- else:
880
- logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', Got '{webhook_info.url}'")
881
- raise RuntimeError("Webhook URL mismatch after setting.")
882
-
883
  await ptb_app.start()
884
- logger.info("PTB Application started (webhook mode). Ready for updates.")
885
- except Exception as e:
886
- logger.error(f"FATAL: Failed to set webhook to {full_webhook_url}: {e}", exc_info=True)
887
- raise RuntimeError(f"Failed to set webhook: {e}") from e
888
- else:
889
- logger.critical("Could not construct valid HTTPS webhook URL from SPACE_HOST.")
890
- raise RuntimeError("Webhook URL could not be determined.")
891
- else:
892
- logger.critical("SPACE_HOST environment variable not found. Cannot set webhook for HF Space.")
893
- raise RuntimeError("SPACE_HOST env var missing, cannot run in webhook mode.")
894
-
895
  logger.info("ASGI Lifespan: Startup complete.")
896
- yield # Application runs here
897
-
898
  except Exception as startup_err:
899
  logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
900
  if ptb_app:
901
  if ptb_app.running: await ptb_app.stop()
902
  await ptb_app.shutdown()
903
  raise
904
- finally:
905
- logger.info("ASGI Lifespan: Shutdown sequence initiated...")
906
  if ptb_app:
907
- if ptb_app.running:
908
- logger.info("Stopping PTB application...")
909
- await ptb_app.stop()
910
- logger.info("Shutting down PTB application...")
911
- await ptb_app.shutdown() # This closes the main bot's request client
912
- logger.info("PTB Application shut down gracefully.")
913
- else:
914
- logger.info("PTB application was not initialized or startup failed.")
915
  logger.info("ASGI Lifespan: Shutdown complete.")
916
 
917
-
918
- # --- Starlette Route Handlers ---
919
  async def health_check(request: Request) -> PlainTextResponse:
920
  """Basic health check endpoint."""
 
921
  bot_status = "Not Initialized"
922
  if ptb_app and ptb_app.bot:
923
  try:
924
- if ptb_app.running:
925
- # Using a flag or cached info is better than get_me repeatedly
926
- bot_info = await ptb_app.bot.get_me()
927
- bot_status = f"Running (@{bot_info.username})"
928
- else:
929
- bot_status = "Initialized but not running"
930
- except Exception as e:
931
- bot_status = f"Error checking status: {e}"
932
- return PlainTextResponse(f"Telegram Bot Summarizer - Status: {bot_status}\nModel: {OPENROUTER_MODEL}\nApify Actor: {APIFY_ACTOR_NAME if _apify_token_exists else 'N/A'}")
933
-
934
 
935
  async def telegram_webhook(request: Request) -> Response:
936
  """Webhook endpoint called by Telegram."""
937
- global WEBHOOK_SECRET # Access the global secret
938
- if not ptb_app:
939
- logger.error("Webhook received but PTB application not initialized.")
940
- return PlainTextResponse('Bot not initialized', status_code=503)
941
- if not ptb_app.running:
942
- logger.warning("Webhook received but PTB application not running.")
943
- return PlainTextResponse('Bot initialized but not running', status_code=503)
944
-
945
  try:
946
- # Verify secret token if configured
947
  if WEBHOOK_SECRET:
948
  token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
949
- if token_header != WEBHOOK_SECRET:
950
- logger.warning(f"Webhook received with invalid secret token. Header: '{token_header}'")
951
- return Response(content="Invalid secret token", status_code=403) # Forbidden
952
-
953
  update_data = await request.json()
954
  update = Update.de_json(data=update_data, bot=ptb_app.bot)
955
  logger.debug(f"Processing update_id: {update.update_id} via webhook")
956
- # PTB's process_update runs the handlers
957
  await ptb_app.process_update(update)
958
- # Return 200 OK quickly to Telegram
959
- return Response(status_code=200)
 
960
 
961
- except json.JSONDecodeError:
962
- logger.error("Webhook received invalid JSON.")
963
- return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
964
- except Exception as e:
965
- logger.error(f"Error processing webhook update: {e}", exc_info=True)
966
- # Return 200 OK to Telegram even if processing failed, to avoid retries for app logic errors
967
- return Response(status_code=200)
968
-
969
- # --- Create Starlette ASGI Application ---
970
- app = Starlette(
971
- debug=False,
972
- lifespan=lifespan,
973
- routes=[
974
  Route("/", endpoint=health_check, methods=["GET"]),
975
- Route("/webhook", endpoint=telegram_webhook, methods=["POST"]),
976
- ]
977
- )
978
  logger.info("Starlette ASGI application created with native routes.")
979
 
980
- # --- Development Server Execution Block (Optional) ---
981
  if __name__ == '__main__':
982
  import uvicorn
983
- logger.warning("Running in development mode using Uvicorn directly (not for production)")
984
- # Use LOGGING_LEVEL env var or default to info
985
  log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
986
  local_port = int(os.environ.get('PORT', 8080))
987
- # Run Uvicorn with the app instance
988
- uvicorn.run("__main__:app", host='0.0.0.0', port=local_port, log_level=log_level, reload=True) # Add reload for dev
 
1
+ # main.py (Refactored to match Colab logic using httpx and ASGI structure)
2
  import os
3
  import re
4
  import logging
 
27
  )
28
  from telegram.constants import ParseMode
29
  from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest, TelegramError
30
+ from telegram.request import HTTPXRequest, BaseRequest
31
 
32
  # --- Other Libraries ---
33
+ import httpx # Use httpx for all async HTTP calls
34
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 
 
35
  from bs4 import BeautifulSoup
36
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type, before_sleep_log
 
37
  try:
38
  import lxml
39
  DEFAULT_PARSER = 'lxml'
40
  except ImportError:
41
  DEFAULT_PARSER = 'html.parser'
42
 
43
+ # NOTE: apify-client is NOT used, as we replicate the REST API call from Colab
 
 
 
 
 
 
 
 
 
44
 
45
  # --- Logging Setup ---
46
  logging.basicConfig(
 
48
  level=logging.INFO
49
  )
50
  logging.getLogger("httpx").setLevel(logging.WARNING)
51
+ # No apify_client logger needed
52
  logging.getLogger("telegram.ext").setLevel(logging.INFO)
53
  logging.getLogger('telegram.bot').setLevel(logging.INFO)
54
  logging.getLogger("urllib3").setLevel(logging.INFO)
 
61
  # --- Global variable for PTB app ---
62
  ptb_app: Optional[Application] = None
63
 
64
+ # --- Environment Variable Loading & Configuration ---
65
+ logger.info("Attempting to load secrets and configuration...")
66
  def get_secret(secret_name):
67
  value = os.environ.get(secret_name)
 
68
  log_length = min(len(value), 8) if value else 0
69
  status = "Found" if value else "Not Found"
70
  logger.info(f"Secret '{secret_name}': {status} (Value starts with: {value[:log_length]}...)")
 
72
 
73
  TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
74
  OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY')
75
+ URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY') # For urltotext.com API
76
  SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
77
+ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN') # For Apify REST API call
78
+ WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
79
+
80
+ # Configuration matching Colab script
81
+ OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
82
+ APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo/youtube-transcripts") # Actor used in Colab
83
+
84
+ # Check Essential Keys
85
+ if not TELEGRAM_TOKEN:
86
+ logger.critical("❌ FATAL: TELEGRAM_TOKEN not found in environment variables.")
87
+ raise RuntimeError("Exiting: Telegram token missing.")
88
+ if not OPENROUTER_API_KEY:
89
+ logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
90
+ # Allow running without summary capability? For now, we'll let it run but log error.
91
+ # raise RuntimeError("Exiting: OpenRouter key missing.")
92
+
93
+ # Log warnings for optional keys (used in fallbacks)
94
+ if not URLTOTEXT_API_KEY: logger.warning("⚠️ WARNING: URLTOTEXT_API_KEY not found. Fallback website scraping unavailable.")
95
+ if not SUPADATA_API_KEY: logger.warning("⚠️ WARNING: SUPADATA_API_KEY not found. First YT transcript fallback unavailable.")
96
+ if not APIFY_API_TOKEN: logger.warning("⚠️ WARNING: APIFY_API_TOKEN not found. Second YT transcript fallback unavailable.")
97
+
98
+ logger.info("Secret loading and configuration check finished.")
99
  logger.info(f"Using OpenRouter Model: {OPENROUTER_MODEL}")
100
+ logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
 
101
 
102
+ _apify_token_exists = bool(APIFY_API_TOKEN) # Flag for conditional logic
103
 
104
+ # --- Retry Decorator (Unchanged) ---
105
+ @retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15),
106
+ retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)),
107
+ before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
 
 
 
 
108
  async def retry_bot_operation(func, *args, **kwargs):
109
+ try: return await func(*args, **kwargs)
 
 
110
  except BadRequest as e:
111
+ ignore_errors = [ "message is not modified", "query is too old", "message to edit not found", "chat not found", "bot was blocked by the user", ]
112
+ if any(err in str(e).lower() for err in ignore_errors): logger.warning(f"Ignoring non-critical BadRequest: {e}"); return None
113
+ logger.error(f"Potentially critical BadRequest: {e}"); raise
114
+ except TelegramError as e: logger.warning(f"TelegramError (will retry if applicable): {e}"); raise
115
+ except Exception as e: logger.error(f"Unexpected error during bot operation: {e}", exc_info=True); raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # --- Helper Functions (Unchanged) ---
 
118
  def is_youtube_url(url):
119
+ youtube_regex = re.compile( r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/' r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
120
+ match = youtube_regex.search(url); logger.debug(f"is_youtube_url '{url}': {bool(match)}"); return bool(match)
 
 
 
 
 
 
 
 
 
121
  def extract_youtube_id(url):
122
+ youtube_regex = re.compile( r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/' r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
 
 
 
 
 
 
123
  match = youtube_regex.search(url)
124
+ if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
125
+ else: logger.warning(f"Could not extract YT ID from {url}"); return None
126
+
127
+ # --- Content Fetching Functions (Reimplemented based on Colab logic using httpx) ---
128
+
129
+ # Generic fetcher used by website scraping (similar to previous version)
130
+ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
131
+ """Fetches HTML content from a URL using httpx for scraping."""
132
+ headers = { # Headers from Colab script
133
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
134
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
135
+ 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1'
 
 
 
 
 
 
136
  }
137
  try:
138
+ async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
139
+ logger.debug(f"[Web Scrape] Sending request to {url}")
140
  response = await client.get(url)
141
+ logger.debug(f"[Web Scrape] Received response {response.status_code} from {url}")
142
  response.raise_for_status()
143
+ content_type = response.headers.get('content-type', '').lower()
144
+ if 'html' not in content_type:
145
+ logger.warning(f"[Web Scrape] Non-HTML content type from {url}: {content_type}")
146
+ return None
147
+ try: return response.text # Let httpx handle encoding
148
+ except Exception as e: logger.error(f"[Web Scrape] Error decoding response for {url}: {e}"); return None
149
+ except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape] HTTP error {e.response.status_code} fetching {url}: {e}")
150
+ except httpx.TimeoutException: logger.error(f"[Web Scrape] Timeout error fetching {url}")
151
+ except httpx.TooManyRedirects: logger.error(f"[Web Scrape] Too many redirects fetching {url}")
152
+ except httpx.RequestError as e: logger.error(f"[Web Scrape] Request error fetching {url}: {e}") # Covers ConnectError etc.
153
+ except Exception as e: logger.error(f"[Web Scrape] Unexpected error fetching {url}: {e}", exc_info=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  return None
155
 
156
+ # --- YT Transcript Fetching ---
157
  async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
158
+ """Fetches YouTube transcript using Supadata API (matching Colab endpoint)."""
159
+ if not video_id: logger.error("[Supadata] No video_id provided"); return None
160
+ if not api_key: logger.error("[Supadata] API key missing."); return None
161
+ logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}")
162
+ # Colab script uses /v1/youtube/transcript
163
+ api_endpoint = "https://api.supadata.net/v1/youtube/transcript"
164
+ params = {"videoId": video_id, "format": "text"} # Params from Colab script
165
+ headers = {"X-API-Key": api_key}
166
  try:
 
 
167
  async with httpx.AsyncClient(timeout=30.0) as client:
168
+ response = await client.get(api_endpoint, headers=headers, params=params)
169
+ logger.debug(f"[Supadata] Status code {response.status_code} for {video_id}")
170
+ if response.status_code == 200:
171
+ try:
172
+ # Try JSON first, then plain text as fallback (Colab logic)
173
+ try: data = response.json()
174
+ except json.JSONDecodeError: data = None
175
+ content = None
176
+ if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
177
+ if not content and response.text: content = response.text # Plain text fallback
178
+ if content and isinstance(content, str):
179
+ logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}")
180
+ return content.strip()
181
+ else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
182
+ except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
183
+ elif response.status_code in [401, 403]: logger.error(f"[Supadata] Auth error ({response.status_code}). Check API key."); return None # Don't retry
184
+ elif response.status_code == 404: logger.warning(f"[Supadata] Not found (404) for {video_id}."); return None
185
+ else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
186
+ except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
187
+ except httpx.RequestError as e: logger.error(f"[Supadata] Request error for {video_id}: {e}"); return None # Includes ConnectError, SSL problems etc.
188
+ except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
189
+
190
+ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
191
+ """Fetches YouTube transcript using Apify REST API (matching Colab actor/endpoint)."""
192
+ global APIFY_ACTOR_ID # Use globally configured actor
193
+ if not video_url: logger.error("[Apify] No video_url provided"); return None
194
+ if not api_token: logger.error("[Apify] API token missing."); return None
195
+ logger.info(f"[Apify] Attempting fetch via REST for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
196
+ api_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
197
+ params = {"token": api_token}
198
+ # Payload from Colab script
199
+ payload = {
200
+ "urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5,
201
+ "channelHandleBoolean": False, "channelNameBoolean": False,
202
+ "datePublishedBoolean": False, "relativeDateTextBoolean": False,
203
+ # Add proxy config here if needed and configured via env vars
204
+ # "proxyOptions": { "useApifyProxy": True, "apifyProxyGroups": ["YOUR_PROXY_GROUP_IF_ANY"] },
205
+ }
206
+ headers = {"Content-Type": "application/json"}
207
  try:
208
+ async with httpx.AsyncClient(timeout=90.0) as client: # Longer timeout for sync run
209
+ logger.debug(f"[Apify] Sending request to run actor {APIFY_ACTOR_ID} synchronously for {video_url}")
210
+ response = await client.post(api_endpoint, headers=headers, params=params, json=payload) # Use json=payload with httpx
211
+ logger.debug(f"[Apify] Received status code {response.status_code} for {video_url}")
212
+ if response.status_code == 200:
213
+ try:
214
+ results = response.json()
215
+ if isinstance(results, list) and len(results) > 0:
216
+ item = results[0]
217
+ # Parsing logic from Colab script
218
+ content = item.get("text") or item.get("transcript") or item.get("captions_concatenated")
219
+ if not content and item.get("captions") and isinstance(item["captions"], list):
220
+ logger.info("[Apify] Processing 'captions' format.")
221
+ content = " ".join(cap.get("text", "") for cap in item["captions"] if cap.get("text"))
222
+ if content and isinstance(content, str):
223
+ logger.info(f"[Apify] Success via REST for {video_url}. Length: {len(content)}")
224
+ return content.strip()
225
+ else: logger.warning(f"[Apify] Actor success but transcript empty/not found for {video_url}. Item: {item}"); return None
226
+ else: logger.warning(f"[Apify] Actor success but dataset empty for {video_url}. Response: {results}"); return None
227
+ except json.JSONDecodeError: logger.error(f"[Apify] Failed JSON decode for {video_url}. Status:{response.status_code}. Resp:{response.text[:200]}"); return None
228
+ except Exception as e: logger.error(f"[Apify] Error processing success response for {video_url}: {e}", exc_info=True); return None
229
+ elif response.status_code == 400: logger.error(f"[Apify] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
230
+ elif response.status_code == 401: logger.error("[Apify] Auth error (401). Check token."); return None # Don't retry
231
+ else: logger.error(f"[Apify] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
232
+ except httpx.TimeoutException: logger.error(f"[Apify] Timeout running actor for {video_url}"); return None
233
+ except httpx.RequestError as e: logger.error(f"[Apify] Request error running actor for {video_url}: {e}"); return None
234
+ except Exception as e: logger.error(f"[Apify] Unexpected error during Apify REST call for {video_url}: {e}", exc_info=True); return None
235
+
236
+ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
237
+ """Fetches YT transcript using primary (lib) + fallbacks (Supadata, Apify REST)."""
238
+ global SUPADATA_API_KEY, APIFY_API_TOKEN # Access globally loaded keys
239
+ if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
240
+ logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
241
+ transcript_text = None
242
+
243
+ # 1. Primary: youtube-transcript-api
244
+ logger.info("[Primary YT] Attempting youtube-transcript-api...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  try:
246
+ transcript_list = await asyncio.to_thread( YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'] )
247
+ if transcript_list: transcript_text = " ".join([item['text'] for item in transcript_list if 'text' in item])
248
+ if transcript_text: logger.info(f"[Primary YT] Success via lib for {video_id} (len: {len(transcript_text)})"); return transcript_text
249
+ else: logger.warning(f"[Primary YT] Transcript list/text empty for {video_id}"); transcript_text = None
 
 
250
  except Exception as e:
251
+ logger.warning(f"[Primary YT] Error via lib for {video_id}: {e}")
252
+ if "No transcript found" in str(e): logger.warning(f"[Primary YT] No transcript found.")
253
+ elif "disabled" in str(e): logger.warning(f"[Primary YT] Transcripts disabled.")
254
+ transcript_text = None
255
+
256
+ # 2. Fallback 1: Supadata API
257
+ if transcript_text is None:
258
+ logger.info("[Fallback YT 1] Trying Supadata API...")
259
+ if SUPADATA_API_KEY:
260
+ transcript_text = await get_transcript_via_supadata(video_id, SUPADATA_API_KEY)
261
+ if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text
262
+ else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
263
+ else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
264
+
265
+ # 3. Fallback 2: Apify REST API
266
+ if transcript_text is None:
267
+ logger.info("[Fallback YT 2] Trying Apify REST API...")
268
+ if APIFY_API_TOKEN:
269
+ transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
270
+ if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify REST for {video_url}"); return transcript_text
271
+ else: logger.warning(f"[Fallback YT 2] Apify REST failed or no content for {video_url}.")
272
+ else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
273
+
274
+ if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
275
+ return transcript_text # Should be None if all failed
276
+
277
+ # --- Website Content Fetching ---
278
+ async def get_website_content(url: str) -> Optional[str]:
279
+ """Primary: Scrapes website using httpx + BeautifulSoup (logic from Colab)."""
280
+ if not url: logger.error("get_website_content: No URL"); return None
281
+ logger.info(f"[Primary Web] Fetching website content for: {url}")
282
+ html_content = await fetch_url_content_for_scrape(url)
283
+ if not html_content: return None
284
  try:
285
  def parse_html(content):
286
  # Use lxml if available, otherwise html.parser
287
  soup = BeautifulSoup(content, DEFAULT_PARSER)
288
+ # Removal logic from Colab script
289
+ for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]):
290
+ element.extract()
291
+ # Content finding logic from Colab script
292
+ main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
293
+ target_element = main_content if main_content else soup.body
294
+ if not target_element: logger.warning(f"[Primary Web] Could not find body/main for parsing {url}"); return None
295
+ lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
296
+ text = " ".join(lines)
297
+ if not text: logger.warning(f"[Primary Web] Extracted text empty after clean for {url}"); return None
298
  return text
299
 
300
  text_content = await asyncio.to_thread(parse_html, html_content)
301
+ if text_content: logger.info(f"[Primary Web] Success scrape for {url} (final len: {len(text_content)})"); return text_content
302
+ else: return None
303
+ except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
304
+
305
+ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
306
+ """Fallback: Fetches website content using urltotext.com API (Colab endpoint)."""
307
+ if not url: logger.error("[Fallback Web API] No URL"); return None
308
+ if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
309
+ logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
310
+ # Endpoint and payload from Colab script
311
+ api_endpoint = "https://urltotext.com/api/v1/urltotext/"
312
+ payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
313
+ headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" } # Note: Token auth
 
 
 
 
 
 
314
  try:
315
+ async with httpx.AsyncClient(timeout=45.0) as client: # Longer timeout for JS render
316
+ logger.debug(f"[Fallback Web API] Sending request to urltotext.com API for {url}")
317
  response = await client.post(api_endpoint, headers=headers, json=payload)
318
+ logger.debug(f"[Fallback Web API] Received status {response.status_code} from urltotext.com API for {url}")
319
+ if response.status_code == 200:
320
+ try:
321
+ data = response.json()
322
+ content = data.get("data", {}).get("content")
323
+ credits = data.get("credits_used", "N/A")
324
+ warning = data.get("data", {}).get("warning")
325
+ if warning: logger.warning(f"[Fallback Web API] urltotext.com API Warning for {url}: {warning}")
326
+ if content: logger.info(f"[Fallback Web API] Success via urltotext.com API for {url}. Len: {len(content)}. Credits: {credits}"); return content.strip()
327
+ else: logger.warning(f"[Fallback Web API] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
328
+ except json.JSONDecodeError: logger.error(f"[Fallback Web API] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
329
+ except Exception as e: logger.error(f"[Fallback Web API] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
330
+ # Error codes from Colab script check
331
+ elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[Fallback Web API] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
332
+ else: logger.error(f"[Fallback Web API] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
333
+ except httpx.TimeoutException: logger.error(f"[Fallback Web API] Timeout connecting to urltotext.com API for {url}"); return None
334
+ except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
335
+ except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
336
+
337
+ # --- Summarization Function (Using DeepSeek via OpenRouter - Colab logic) ---
338
+ async def generate_summary(text: str, summary_type: str) -> str:
339
+ """Generates summary using DeepSeek via OpenRouter API (Colab prompts/model)."""
340
+ global OPENROUTER_API_KEY, OPENROUTER_MODEL # Use globally loaded config
341
+ logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
342
+ if not OPENROUTER_API_KEY: logger.error("OpenRouter key missing for generate_summary."); return "Error: AI model configuration key missing."
343
+
344
+ # Prompts from Colab script
345
+ if summary_type == "paragraph":
346
+ prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
347
+ "• Clear and simple language suitable for someone unfamiliar with the topic.\n"
348
+ "• Uses British English spellings throughout.\n"
349
+ "• Straightforward and understandable vocabulary; avoid complex terms.\n"
350
+ " Presented as ONE SINGLE PARAGRAPH.\n"
351
+ "• No more than 85 words maximum; but does not have to be exactly 85.\n"
352
+ "• Considers the entire text content equally.\n"
353
+ "• Uses semicolons (;) instead of em dashes (– or —).\n\n"
354
+ "Here is the text to summarise:")
355
+ else: # points
356
+ prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n"
357
+ "• For each distinct topic or section identified in the text, create a heading.\n"
358
+ "• Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n"
359
+ "• Immediately following each heading, list the key points as a bulleted list.\n"
360
+ "• Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n"
361
+ "• The text within each bullet point should NOT contain any bold formatting.\n"
362
+ "• Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n"
363
+ "• Use British English spellings throughout.\n"
364
+ " Avoid overly complex or advanced vocabulary.\n"
365
+ "• Keep bullet points concise.\n"
366
+ "• Ensure the entire summary takes no more than two minutes to read.\n"
367
+ " Consider the entire text's content, not just the beginning or a few topics.\n"
368
+ "• Use semicolons (;) instead of em dashes (– or —).\n\n"
369
+ "Here is the text to summarise:")
370
+
371
+ # Limit input length (Colab script used 500k, adjust if needed)
372
+ MAX_INPUT_LENGTH = 500000
373
+ if len(text) > MAX_INPUT_LENGTH:
374
+ logger.warning(f"Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating.")
375
+ text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
376
+ full_prompt = f"{prompt}\n\n{text}"
377
+
378
+ headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }
379
+ payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }
380
+ openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
381
 
382
  try:
383
+ async with httpx.AsyncClient(timeout=60.0) as client: # Timeout from Colab
384
+ logger.debug(f"Sending request to OpenRouter ({OPENROUTER_MODEL})...")
385
+ response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
386
+ logger.debug(f"Received status {response.status_code} from OpenRouter.")
387
+ if response.status_code == 200:
388
+ try:
389
+ data = response.json()
390
+ if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
391
+ message = data["choices"][0].get("message")
392
+ if message and isinstance(message, dict):
393
+ summary = message.get("content")
394
+ if summary: logger.info(f"Success generating summary via OpenRouter. Output len: {len(summary)}"); return summary.strip()
395
+ else: logger.warning(f"OpenRouter success but content empty. Resp: {data}"); return "Sorry, the AI model returned an empty summary."
396
+ else: logger.error(f"Unexpected message structure in OpenRouter resp: {message}. Full: {data}"); return "Sorry, could not parse AI response (format)."
397
+ else: logger.error(f"Unexpected choices structure in OpenRouter resp: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse AI response (choices)."
398
+ except json.JSONDecodeError: logger.error(f"Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand AI response."
399
+ except Exception as e: logger.error(f"Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing AI response."
400
+ # Error handling matching Colab script
401
+ elif response.status_code == 401: logger.error("OpenRouter API key invalid (401)."); return "Error: AI model configuration key is invalid."
402
+ elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
403
+ elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
404
+ elif response.status_code == 500: logger.error(f"OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, AI service internal error."
405
+ else: logger.error(f"Unexpected status {response.status_code} from OpenRouter. Resp:{response.text[:500]}"); return f"Sorry, AI service returned unexpected status ({response.status_code})."
406
+ except httpx.TimeoutException: logger.error("Timeout connecting to OpenRouter API."); return "Sorry, request to AI model timed out."
407
+ except httpx.RequestError as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, error connecting to AI service."
408
+ except Exception as e: logger.error(f"Unexpected error in generate_summary (OpenRouter): {e}", exc_info=True); return "Sorry, unexpected error generating summary."
409
+
410
+ # --- Background Task Processing (Orchestrates new fetch/summary functions) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  async def process_summary_task(
412
+ user_id: int, chat_id: int, message_id_to_edit: Optional[int],
413
+ url: str, summary_type: str, bot_token: str # bot_token needed to create instance
 
 
 
 
414
  ) -> None:
 
415
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"
416
  logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
 
 
417
  background_request: Optional[BaseRequest] = None
418
  bot: Optional[Bot] = None
419
+ try: # Create background bot instance
420
+ background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 )
 
 
421
  bot = Bot(token=bot_token, request=background_request)
422
+ except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
423
+
424
+ content = None; user_feedback_message = None; success = False
 
 
 
 
 
 
425
  status_message_id = message_id_to_edit
426
+ message_to_delete_later_id : Optional[int] = None # Track ID of new status message
427
 
428
  try:
429
+ # --- Inform User Processing Started ---
430
+ processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
431
  if status_message_id:
432
  try:
433
+ await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id,
434
+ text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None )
435
+ logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
 
 
436
  except Exception as e:
437
+ logger.warning(f"[Task {task_id}] Could not edit original message {status_message_id}: {e}. Sending new.")
438
+ status_message_id = None # Will trigger sending new message
439
+ if not status_message_id: # Send new status message if needed
440
  try:
441
+ status_message = await retry_bot_operation( bot.send_message, chat_id=chat_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN )
442
+ if status_message: message_to_delete_later_id = status_message.message_id; logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
443
+ else: raise RuntimeError("Failed to send status message after retries.")
444
+ except Exception as e: logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}"); raise # Stop if we can't inform user
445
+
446
+ # --- Main Fetching & Summarization ---
 
 
 
 
 
 
 
 
447
  try:
448
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
449
+ is_youtube = is_youtube_url(url)
450
+ logger.debug(f"[Task {task_id}] URL type: {'YouTube' if is_youtube else 'Website'}")
451
+
452
+ if is_youtube:
453
  video_id = extract_youtube_id(url)
454
+ if video_id: content = await get_youtube_transcript(video_id, url) # Calls new func with fallbacks
455
+ else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
456
+ if not content and not user_feedback_message: # Set default fail message if get_youtube_transcript returned None
457
+ user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
458
+ else: # Website
459
+ content = await get_website_content(url) # Calls new primary func
460
+ if not content: # Try fallback
461
+ logger.warning(f"[Task {task_id}] Primary web scrape failed for {url}. Trying fallback API.")
462
+ global URLTOTEXT_API_KEY # Access key
463
+ if URLTOTEXT_API_KEY:
464
+ await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
465
+ content = await get_website_content_via_api(url, URLTOTEXT_API_KEY) # Calls new fallback func
466
+ if not content: user_feedback_message = "Sorry, I couldn't fetch content from that website using either method (blocked/inaccessible/empty?)."
467
+ else:
468
+ user_feedback_message = "Sorry, I couldn't fetch content from that website (blocked/inaccessible/empty?). The fallback method is not configured."
469
 
470
+ # --- Generate Summary ---
471
  if content:
472
+ logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
473
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
474
+ # Use new generate_summary function (keys accessed globally within it)
475
+ final_summary = await generate_summary(content, summary_type)
476
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
477
+ user_feedback_message = final_summary # Use error from summary func
478
+ logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
479
+ else:
480
+ # Success! Send summary (split if needed)
481
+ max_length = 4096
482
+ summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
483
+ # Send first/only part
484
+ await retry_bot_operation( bot.send_message, chat_id=chat_id, text=summary_parts[0],
485
+ parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True} )
486
+ # Send subsequent parts
487
+ for part in summary_parts[1:]: await asyncio.sleep(0.5); await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=ParseMode.MARKDOWN, link_preview_options={'is_disabled': True} )
488
+ success = True; logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts).")
489
+ user_feedback_message = None # Clear any previous error
490
+
491
+ # --- Send Feedback if Fetching or Summary Failed ---
492
+ elif user_feedback_message: # Only send if content failed AND message exists
493
+ logger.warning(f"[Task {task_id}] Sending failure feedback: {user_feedback_message}")
494
+ await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )
495
 
496
  except Exception as e:
497
+ logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True)
498
+ user_feedback_message = "Oops! Something went really wrong while processing your request. Please try again later."
499
+ # Ensure we send this feedback if an unexpected exception occurs
500
+ try: await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
501
+ except Exception: logger.error(f"[Task {task_id}] Failed to send unexpected error feedback.")
502
+
503
+ except Exception as outer_e: # Catch critical errors like failing to send status message
504
+ logger.critical(f"[Task {task_id}] Critical outer error: {outer_e}", exc_info=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  try:
506
+ if bot: await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ Critical internal error occurred." )
507
+ except Exception: logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
508
+
 
 
 
 
 
 
 
509
  finally:
510
+ # --- Cleanup ---
511
+ delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
512
+ if delete_target_id and bot:
513
  try:
514
+ # Delete the original button message OR the status message we sent
515
+ await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id)
516
+ logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
517
+ except Exception as del_e: logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
518
+ # Close background bot's httpx client
 
 
 
519
  if background_request and hasattr(background_request, '_client') and background_request._client:
520
+ try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
521
+ except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
 
 
 
 
 
 
522
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
523
 
524
 
525
+ # --- Telegram Bot Handlers (Unchanged structure, Colab text/logic adjusted) ---
526
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
527
+ user = update.effective_user; mention = user.mention_html()
 
528
  if not user or not update.message: return
529
+ logger.info(f"User {user.id} used /start.")
530
+ await update.message.reply_html( f"👋 Hello {mention}! I can summarize YouTube links or website URLs.\n\nJust send me a link anytime!" )
 
 
 
 
 
 
 
531
 
532
  async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
 
533
  user = update.effective_user
534
  if not user or not update.message: return
535
+ logger.info(f"User {user.id} used /help.")
536
+ # Help text from Colab script
537
+ help_text = ( "🔍 How to use this bot:\n\n"
538
+ "1. Send me any YouTube video link or website URL.\n"
539
+ "2. I'll ask you how you want it summarized (paragraph or points).\n"
540
+ "3. Click the button for your choice.\n"
541
+ "4. Wait for the summary!\n\n"
542
+ "I'll try multiple methods to get content if the first one fails (especially for YouTube transcripts).\n\n"
543
+ "Commands:\n"
544
+ "`/start` - Display welcome message\n"
545
+ "`/help` - Show this help message" )
 
 
 
546
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
547
 
548
  async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
 
549
  if not update.message or not update.message.text: return
550
+ url = update.message.text.strip(); user = update.effective_user
 
551
  if not user: return
552
+ # Basic validation from Colab script
553
+ if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]:
554
+ logger.debug(f"Ignoring non-URL from {user.id}: {url}"); return
555
+ logger.info(f"User {user.id} sent potential URL: {url}")
556
+ context.user_data['url_to_summarize'] = url
557
+ context.user_data['original_message_id'] = update.message.message_id # Still useful potentially
558
+ # Keyboard text from Colab script
559
+ keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
560
+ reply_markup = InlineKeyboardMarkup(keyboard)
561
+ # Reply text from Colab script
562
+ await update.message.reply_text( f"Okay, I see this link:\n{url}\n\nHow would you like it summarized?",
563
+ reply_markup=reply_markup, disable_web_page_preview=True )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
 
565
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
566
+ """Handles button press, retrieves URL, and schedules background task."""
567
  query = update.callback_query
568
+ if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
569
+ user = query.from_user; summary_type = query.data; query_id = query.id
570
+ try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id}")
571
+ except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True)
 
 
 
 
 
 
 
 
 
 
572
 
573
  url = context.user_data.get('url_to_summarize')
574
+ message_id_to_edit = query.message.message_id # Use the message with the buttons
575
+ logger.info(f"User {user.id} chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
 
576
 
577
  if not url:
578
+ logger.warning(f"No URL in context for user {user.id} (cb {query_id}).")
579
+ try: await query.edit_message_text(text="Sorry, I couldn't find the URL associated with this request. Please send the link again.")
580
+ except Exception as e: logger.error(f"Failed edit 'URL not found' msg: {e}"); try: await context.bot.send_message(chat_id=user.id, text="Sorry, context lost. Send link again.") except Exception: pass
 
 
 
581
  return
582
 
583
+ # Clear context *before* starting task to prevent race conditions if user clicks fast
584
+ context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None)
585
+ logger.debug(f"Cleared URL context for user {user.id}")
586
 
587
+ # Check essential keys needed for the task *before* scheduling
588
+ global TELEGRAM_TOKEN, OPENROUTER_API_KEY
589
+ if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing!"); try: await query.edit_message_text(text="❌ Bot config error.") except Exception: pass; return
590
+ if not OPENROUTER_API_KEY: logger.error("OpenRouter key missing!"); try: await query.edit_message_text(text="❌ AI config error.") except Exception: pass; return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
+ logger.info(f"Scheduling task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
593
+ # Pass the bot token to the background task so it can create its own instance
594
+ asyncio.create_task( process_summary_task( user_id=user.id, chat_id=query.message.chat_id, message_id_to_edit=message_id_to_edit,
595
+ url=url, summary_type=summary_type, bot_token=TELEGRAM_TOKEN ), name=f"SummaryTask-{user.id}-{message_id_to_edit}" )
596
 
597
+ # --- Error Handler, Bot Setup, Lifespan, Routes (Largely Unchanged, Ensure Keys Read) ---
598
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
599
  """Log Errors caused by Updates."""
600
+ # Add specific error types to ignore if they are handled elsewhere or benign
601
+ ignore_errors = (AttributeError, ) # Example: Ignore cleanup errors if handled in finally blocks
602
+ if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): # Be more specific
603
+ logger.debug(f"Ignoring known/handled error in error_handler: {context.error}")
604
  return
 
605
  logger.error("Exception while handling an update:", exc_info=context.error)
606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  async def setup_bot_config() -> Application:
608
  """Configures the PTB Application."""
609
  logger.info("Configuring Telegram Application...")
610
+ global TELEGRAM_TOKEN # Ensure global token is accessible
611
+ if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
612
+ custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
613
+ application = Application.builder().token(TELEGRAM_TOKEN).request(custom_request).build()
 
 
 
 
 
 
 
 
 
 
614
  application.add_handler(CommandHandler("start", start))
615
  application.add_handler(CommandHandler("help", help_command))
616
  application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
617
  application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
618
  application.add_error_handler(error_handler)
 
619
  logger.info("Telegram application handlers configured.")
620
  return application
621
 
 
622
  @contextlib.asynccontextmanager
623
  async def lifespan(app: Starlette):
624
  """Handles PTB startup and shutdown during ASGI lifespan."""
625
+ global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN # Access globals
626
+ logger.info("ASGI Lifespan: Startup initiated...")
627
+ # Essential key check already happened globally, but double-check token
628
+ if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
 
 
 
 
629
  try:
630
  ptb_app = await setup_bot_config()
631
  await ptb_app.initialize()
632
  bot_info = await ptb_app.bot.get_me()
633
+ logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
634
+ # --- Webhook setup (unchanged from previous version) ---
 
635
  current_webhook_info = await ptb_app.bot.get_webhook_info()
636
  if current_webhook_info and current_webhook_info.url:
637
+ logger.info(f"Found existing webhook: {current_webhook_info.url}. Deleting...")
638
  try:
639
+ if await ptb_app.bot.delete_webhook(drop_pending_updates=True): logger.info("Webhook deleted.")
640
+ else: logger.warning("Failed delete webhook (API returned False).")
641
+ except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1)
 
 
 
 
 
 
642
  space_host = os.environ.get("SPACE_HOST")
643
+ webhook_path = "/webhook"; full_webhook_url = None
 
644
  if space_host:
645
+ protocol = "https://"; host = space_host.split('://')[-1]
 
646
  full_webhook_url = f"{protocol}{host.rstrip('/')}{webhook_path}"
 
647
  if full_webhook_url:
648
+ logger.info(f"Setting webhook: {full_webhook_url}")
649
+ set_webhook_args = { "url": full_webhook_url, "allowed_updates": Update.ALL_TYPES, "drop_pending_updates": True }
650
+ if WEBHOOK_SECRET: set_webhook_args["secret_token"] = WEBHOOK_SECRET; logger.info("Using webhook secret.")
651
+ await asyncio.sleep(1.0)
 
 
 
 
 
 
 
 
652
  try:
653
  await ptb_app.bot.set_webhook(**set_webhook_args)
654
  webhook_info = await ptb_app.bot.get_webhook_info()
655
+ if webhook_info.url == full_webhook_url: logger.info(f"Webhook set: URL='{webhook_info.url}', Secret={bool(WEBHOOK_SECRET)}")
656
+ else: logger.error(f"Webhook URL mismatch! Expected '{full_webhook_url}', Got '{webhook_info.url}'"); raise RuntimeError("Webhook URL mismatch.")
 
 
 
 
 
657
  await ptb_app.start()
658
+ logger.info("PTB Application started (webhook mode).")
659
+ except Exception as e: logger.error(f"FATAL: Failed set webhook: {e}", exc_info=True); raise RuntimeError(f"Failed set webhook: {e}") from e
660
+ else: logger.critical("Could not construct webhook URL."); raise RuntimeError("Webhook URL undetermined.")
661
+ else: logger.critical("SPACE_HOST missing."); raise RuntimeError("SPACE_HOST env var missing.")
662
+ # --- End Webhook Setup ---
 
 
 
 
 
 
663
  logger.info("ASGI Lifespan: Startup complete.")
664
+ yield # App runs
 
665
  except Exception as startup_err:
666
  logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
667
  if ptb_app:
668
  if ptb_app.running: await ptb_app.stop()
669
  await ptb_app.shutdown()
670
  raise
671
+ finally: # Shutdown
672
+ logger.info("ASGI Lifespan: Shutdown initiated...")
673
  if ptb_app:
674
+ if ptb_app.running: logger.info("Stopping PTB..."); await ptb_app.stop()
675
+ logger.info("Shutting down PTB..."); await ptb_app.shutdown()
676
+ logger.info("PTB Application shut down.")
677
+ else: logger.info("PTB application not initialized or failed.")
 
 
 
 
678
  logger.info("ASGI Lifespan: Shutdown complete.")
679
 
 
 
680
  async def health_check(request: Request) -> PlainTextResponse:
681
  """Basic health check endpoint."""
682
+ global OPENROUTER_MODEL, APIFY_ACTOR_ID, _apify_token_exists
683
  bot_status = "Not Initialized"
684
  if ptb_app and ptb_app.bot:
685
  try:
686
+ if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
687
+ else: bot_status = "Initialized/Not running"
688
+ except Exception as e: bot_status = f"Error checking status: {e}"
689
+ # Include model/actor info in health check
690
+ return PlainTextResponse(f"TG Bot Summarizer - Status: {bot_status}\nModel: {OPENROUTER_MODEL}\nApify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}")
 
 
 
 
 
691
 
692
  async def telegram_webhook(request: Request) -> Response:
693
  """Webhook endpoint called by Telegram."""
694
+ global WEBHOOK_SECRET # Access global
695
+ if not ptb_app: logger.error("Webhook recv but PTB not initialized."); return PlainTextResponse('Bot not initialized', status_code=503)
696
+ if not ptb_app.running: logger.warning("Webhook recv but PTB not running."); return PlainTextResponse('Bot not running', status_code=503)
 
 
 
 
 
697
  try:
698
+ # Secret check (unchanged)
699
  if WEBHOOK_SECRET:
700
  token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
701
+ if token_header != WEBHOOK_SECRET: logger.warning(f"Webhook invalid secret. Header: '{token_header}'"); return Response(content="Invalid secret token", status_code=403)
 
 
 
702
  update_data = await request.json()
703
  update = Update.de_json(data=update_data, bot=ptb_app.bot)
704
  logger.debug(f"Processing update_id: {update.update_id} via webhook")
 
705
  await ptb_app.process_update(update)
706
+ return Response(status_code=200) # OK
707
+ except json.JSONDecodeError: logger.error("Webhook invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
708
+ except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
709
 
710
+ # --- Create Starlette ASGI Application (Unchanged) ---
711
+ app = Starlette( debug=False, lifespan=lifespan, routes=[
 
 
 
 
 
 
 
 
 
 
 
712
  Route("/", endpoint=health_check, methods=["GET"]),
713
+ Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
 
 
714
  logger.info("Starlette ASGI application created with native routes.")
715
 
716
+ # --- Development Server Block (Unchanged) ---
717
  if __name__ == '__main__':
718
  import uvicorn
719
+ logger.warning("Running in development mode using Uvicorn directly")
 
720
  log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
721
  local_port = int(os.environ.get('PORT', 8080))
722
+ uvicorn.run("__main__:app", host='0.0.0.0', port=local_port, log_level=log_level, reload=True)