fmab777 commited on
Commit
cd72c3f
·
verified ·
1 Parent(s): 2e112ba

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +231 -777
main.py CHANGED
@@ -1,21 +1,22 @@
1
- # main.py (Revised: Use asyncio.create_task for callback processing)
2
  import os
3
  import re
4
  import logging
5
  import asyncio
6
  import json
7
- import html # For unescaping HTML entities
8
- import contextlib # For async context manager (lifespan)
9
- import traceback # For logging exceptions in tasks
 
10
 
11
  # --- Frameworks ---
12
- from flask import Flask, request, Response # Core web routes
13
- from starlette.applications import Starlette # ASGI App & Lifespan
14
- from starlette.routing import Mount # Mount Flask within Starlette
15
- from starlette.middleware.wsgi import WSGIMiddleware # Wrap Flask for Starlette
16
 
17
  # --- Telegram Bot ---
18
- from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup, Bot # Import Bot
19
  from telegram.ext import (
20
  Application,
21
  CommandHandler,
@@ -25,7 +26,7 @@ from telegram.ext import (
25
  CallbackQueryHandler,
26
  )
27
  from telegram.constants import ParseMode
28
- from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest # Import BadRequest
29
  from telegram.request import HTTPXRequest
30
 
31
  # --- Other Libraries ---
@@ -33,6 +34,8 @@ import httpx
33
  from youtube_transcript_api import YouTubeTranscriptApi
34
  import requests
35
  from bs4 import BeautifulSoup
 
 
36
  _apify_token_exists = bool(os.environ.get('APIFY_API_TOKEN'))
37
  if _apify_token_exists:
38
  from apify_client import ApifyClient
@@ -55,8 +58,8 @@ logging.getLogger('starlette').setLevel(logging.INFO)
55
  logger = logging.getLogger(__name__)
56
  logger.info("Logging configured.")
57
 
58
- # --- Global variable for PTB app (initialized during lifespan) ---
59
- ptb_app: Application | None = None
60
 
61
  # --- Environment Variable Loading ---
62
  logger.info("Attempting to load secrets...")
@@ -73,14 +76,26 @@ SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
73
  APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
74
  logger.info("Secret loading attempt finished.")
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- # --- Bot Logic Functions ---
78
- # (Keep ALL your functions: is_youtube_url, extract_youtube_id,
79
- # get_transcript_via_supadata, get_transcript_via_apify,
80
- # get_youtube_transcript, get_website_content_via_requests,
81
- # get_website_content_via_urltotext_api, generate_summary - unchanged)
82
-
83
- # Helper Functions
84
  def is_youtube_url(url):
85
  """Checks if the URL is a valid YouTube video or shorts URL."""
86
  youtube_regex = r'(https?://)?(www\.)?(youtube\.com/(watch\?v=|shorts/)|youtu\.be/)([\w-]{11})'
@@ -100,608 +115,166 @@ def extract_youtube_id(url):
100
  logger.warning(f"Could not extract YouTube ID from URL: {url}")
101
  return None
102
 
103
- # Supadata Transcript Fetching
104
- async def get_transcript_via_supadata(video_id: str, api_key: str):
105
- """Fetches YouTube transcript via Supadata API."""
106
- if not video_id: logger.error("[Supadata] get_transcript_via_supadata called with no video_id"); return None
107
- if not api_key: logger.error("[Supadata] API key is missing."); return None
108
- logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}")
109
- api_endpoint = f"https://api.supadata.net/v1/youtube/transcript"
110
- params = {"videoId": video_id, "format": "text"}
111
- headers = {"X-API-Key": api_key}
112
- try:
113
- logger.warning("[Supadata] Making request with verify=False (Attempting to bypass SSL verification - Potential Security Risk)")
114
- response = await asyncio.to_thread(requests.get, api_endpoint, headers=headers, params=params, timeout=30, verify=False)
115
- logger.debug(f"[Supadata] Received status code {response.status_code} for {video_id}")
116
- if response.status_code == 200:
117
- try:
118
- data = response.json()
119
- content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
120
- if content and isinstance(content, str):
121
- logger.info(f"[Supadata] Successfully fetched transcript for {video_id}. Length: {len(content)}")
122
- return content.strip()
123
- else:
124
- logger.warning(f"[Supadata] API success but content empty/invalid for {video_id}. Response: {data}")
125
- return None
126
- except json.JSONDecodeError:
127
- if response.text:
128
- logger.info(f"[Supadata] Successfully fetched transcript (plain text) for {video_id}. Length: {len(response.text)}")
129
- return response.text.strip()
130
- else:
131
- logger.error(f"[Supadata] Failed to decode JSON response (and no text body) for {video_id}. Response: {response.text[:200]}...")
132
- return None
133
- except Exception as e:
134
- logger.error(f"[Supadata] Error processing successful response for {video_id}: {e}", exc_info=True)
135
- return None
136
- elif response.status_code in [401, 403]:
137
- logger.error(f"[Supadata] Authentication error ({response.status_code}). Check API key.")
138
- return None
139
- elif response.status_code == 404:
140
- logger.warning(f"[Supadata] Transcript not found ({response.status_code}) for {video_id}.")
141
- return None
142
- else:
143
- logger.error(f"[Supadata] Unexpected status code {response.status_code} for {video_id}. Response: {response.text[:200]}...")
144
- return None
145
- except requests.exceptions.Timeout:
146
- logger.error(f"[Supadata] Timeout error connecting to API for {video_id}")
147
- return None
148
- except requests.exceptions.RequestException as e:
149
- logger.error(f"[Supadata] Request error connecting to API for {video_id}: {e}")
150
- if isinstance(e, requests.exceptions.SSLError):
151
- logger.error(f"[Supadata] SSL Error occurred despite using verify=False. Details: {e}")
152
- return None
153
- except Exception as e:
154
- logger.error(f"[Supadata] Unexpected error during API call for {video_id}: {e}", exc_info=True)
155
- return None
156
-
157
- # Apify Transcript Fetching (with fixed fallback parsing)
158
- async def get_transcript_via_apify(video_url: str, api_token: str):
159
- """Fetches YouTube transcript via Apify API."""
160
- if not video_url: logger.error("[Apify] get_transcript_via_apify called with no video_url"); return None
161
- if not api_token: logger.error("[Apify] API token is missing."); return None
162
- if not ApifyClient: logger.error("[Apify] ApifyClient not available/imported."); return None
163
-
164
- logger.info(f"[Apify] Attempting fetch for URL: {video_url}")
165
- actor_id = "karamelo~youtube-transcripts"
166
- api_endpoint = f"https://api.apify.com/v2/acts/{actor_id}/run-sync-get-dataset-items"
167
- params = {"token": api_token}
168
- payload = json.dumps({
169
- "urls": [video_url],
170
- "outputFormat": "singleStringText",
171
- "maxRetries": 3,
172
- "channelHandleBoolean": False,
173
- "channelNameBoolean": False,
174
- "datePublishedBoolean": False,
175
- "relativeDateTextBoolean": False,
176
- })
177
- headers = {"Content-Type": "application/json"}
178
- try:
179
- logger.debug(f"[Apify] Sending request to run actor {actor_id} synchronously for {video_url}")
180
- response = await asyncio.to_thread(requests.post, api_endpoint, headers=headers, params=params, data=payload, timeout=90)
181
- logger.debug(f"[Apify] Received status code {response.status_code} for {video_url}")
182
-
183
- if response.status_code in [200, 201]:
184
- try:
185
- results = response.json()
186
- if isinstance(results, list) and len(results) > 0:
187
- item = results[0]
188
- content = item.get("text") or item.get("transcript") or item.get("captions_concatenated")
189
-
190
- if not content and item.get("captions"):
191
- captions_data = item["captions"]
192
- if isinstance(captions_data, str):
193
- logger.info("[Apify] Processing 'captions' string format as fallback.")
194
- content = captions_data.strip()
195
- if len(content) < 50 and "error" in content.lower():
196
- logger.warning(f"[Apify] 'captions' string looks like an error: {content}")
197
- content = None
198
- elif isinstance(captions_data, list):
199
- logger.info("[Apify] Processing 'captions' list format as fallback.")
200
- texts = [cap.get("text", "") for cap in captions_data if isinstance(cap, dict) and cap.get("text")]
201
- content = " ".join(texts).strip()
202
- else:
203
- logger.warning(f"[Apify] 'captions' field found but is neither string nor list: {type(captions_data)}")
204
- content = None
205
-
206
- if content:
207
- try:
208
- content = html.unescape(content) # Use imported html module
209
- except Exception as unescape_err:
210
- logger.warning(f"[Apify] Error during html unescaping: {unescape_err}")
211
-
212
- if content and isinstance(content, str):
213
- logger.info(f"[Apify] Successfully fetched transcript via run-sync for {video_url} (Status: {response.status_code}). Length: {len(content)}")
214
- return content
215
- else:
216
- if item.get("text") or item.get("transcript") or item.get("captions_concatenated"): logger.warning(f"[Apify] Actor success ({response.status_code}) but primary fields empty for {video_url}.")
217
- elif not item.get("captions"): logger.warning(f"[Apify] Actor success ({response.status_code}) but no relevant fields found for {video_url}. Item: {item}")
218
- else: logger.warning(f"[Apify] Actor success ({response.status_code}), 'captions' found but fallback parsing failed for {video_url}.")
219
- return None
220
- else:
221
- logger.warning(f"[Apify] Actor success ({response.status_code}) but dataset result list empty for {video_url}. Response: {results}")
222
- return None
223
- except json.JSONDecodeError:
224
- logger.error(f"[Apify] Failed JSON decode for {video_url}. Status: {response.status_code}. Resp: {response.text[:200]}...")
225
- return None
226
- except Exception as e:
227
- logger.error(f"[Apify] Error processing successful response ({response.status_code}) for {video_url}: {e}", exc_info=True)
228
- return None
229
- elif response.status_code == 400: logger.error(f"[Apify] Bad Request (400) for {video_url}. Resp: {response.text[:200]}..."); return None
230
- elif response.status_code == 401: logger.error("[Apify] Auth error (401). Check token."); return None
231
- else: logger.error(f"[Apify] Unexpected status {response.status_code} for {video_url}. Resp: {response.text[:200]}..."); return None
232
-
233
- except requests.exceptions.Timeout: logger.error(f"[Apify] Timeout error running actor for {video_url}"); return None
234
- except requests.exceptions.RequestException as e: logger.error(f"[Apify] Request error running actor for {video_url}: {e}"); return None
235
- except Exception as e: logger.error(f"[Apify] Unexpected error during Apify call for {video_url}: {e}", exc_info=True); return None
236
-
237
- # Combined YouTube Transcript Function
238
- async def get_youtube_transcript(video_id: str, video_url: str, supadata_key: str | None, apify_token: str | None):
239
- """Fetches YouTube transcript using library, then Supadata, then Apify."""
240
- if not video_id: logger.error("get_youtube_transcript called with no video_id"); return None
241
- logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
242
- transcript_text = None
243
- try: # Primary: Library
244
- logger.info("[Primary YT] Attempting youtube-transcript-api...")
245
- transcript_list = await asyncio.to_thread(YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'])
246
- if transcript_list:
247
- transcript_text = " ".join([item['text'] for item in transcript_list if 'text' in item])
248
- transcript_text = re.sub(r'\s+', ' ', transcript_text).strip()
249
- if transcript_text: logger.info(f"[Primary YT] Success via library. Length: {len(transcript_text)}"); return transcript_text
250
- else: logger.warning("[Primary YT] Joined text empty after cleaning."); transcript_text = None
251
- else: logger.warning("[Primary YT] Transcript list empty."); transcript_text = None
252
- except Exception as e:
253
- logger.warning(f"[Primary YT] Error via library: {type(e).__name__} - {e}")
254
- if "YouTube is blocking requests" in str(e) or "HTTP Error 429" in str(e): logger.warning("[Primary YT] IP likely blocked.")
255
- elif "No transcript found" in str(e): logger.warning("[Primary YT] No transcript in specified languages.")
256
- elif "TranscriptsDisabled" in str(e) or "disabled" in str(e): logger.warning("[Primary YT] Transcripts disabled for this video.")
257
- transcript_text = None # Ensure it's None on error
258
-
259
- if transcript_text is None: # Fallback 1: Supadata
260
- logger.info("[Fallback YT 1] Trying Supadata API...")
261
- if supadata_key:
262
- transcript_text = await get_transcript_via_supadata(video_id, supadata_key)
263
- if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata. Length: {len(transcript_text)}"); return transcript_text
264
- else: logger.warning("[Fallback YT 1] Supadata failed or no content found.")
265
- else: logger.warning("[Fallback YT 1] Supadata key not available, skipping.")
266
-
267
- if transcript_text is None: # Fallback 2: Apify
268
- logger.info("[Fallback YT 2] Trying Apify API...")
269
- if apify_token:
270
- transcript_text = await get_transcript_via_apify(video_url, apify_token)
271
- if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify. Length: {len(transcript_text)}"); return transcript_text
272
- else: logger.warning("[Fallback YT 2] Apify failed or no content found.")
273
- else: logger.warning("[Fallback YT 2] Apify token not available, skipping.")
274
-
275
- if transcript_text is None: logger.error(f"All methods failed to fetch transcript for video ID: {video_id}")
276
- return transcript_text
277
-
278
- # Website Content via Requests/BS4
279
- async def get_website_content_via_requests(url):
280
- """Attempts to scrape website content using requests/BeautifulSoup."""
281
- if not url: logger.error("[Web Scraper - Requests/BS4] get_website_content_via_requests called with no URL"); return None
282
- logger.info(f"[Web Scraper - Requests/BS4] Attempting fetch: {url}")
283
- try:
284
- headers = {
285
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
286
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
287
- 'Accept-Language': 'en-US,en;q=0.9',
288
- 'Connection': 'keep-alive',
289
- 'DNT': '1',
290
- 'Upgrade-Insecure-Requests': '1'
291
- }
292
- response = await asyncio.to_thread(requests.get, url, headers=headers, timeout=25, allow_redirects=True)
293
- response.raise_for_status()
294
- logger.debug(f"[Web Scraper - Requests/BS4] Status {response.status_code} for {url}")
295
-
296
- content_type = response.headers.get('content-type', '').lower()
297
- if 'html' not in content_type:
298
- logger.warning(f"[Web Scraper - Requests/BS4] Non-HTML content type received: {content_type}. Attempting plain text extraction.")
299
- if 'text/plain' in content_type and response.text:
300
- logger.info(f"[Web Scraper - Requests/BS4] Extracted plain text content. Length: {len(response.text.strip())}")
301
- return response.text.strip()
302
- logger.warning(f"[Web Scraper - Requests/BS4] Content type '{content_type}' not suitable for parsing. Aborting.")
303
- return None
304
-
305
- soup = BeautifulSoup(response.text, 'html.parser')
306
- tags_to_remove = ["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "textarea", "select", "option", "label", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "video", "audio", "picture", "source"]
307
- selectors_to_remove = ['.ad', '#ad', '.ads', '#ads', '.advertisement', '#advertisement', '.banner', '#banner', '.menu', '#menu', '.navigation', '#navigation', '.sidebar', '#sidebar', '.social', '#social', '.share', '#share', '.related', '#related', '.comments', '#comments', '.cookie-consent', '#cookie-consent']
308
- for tag in soup(tags_to_remove): tag.decompose()
309
- for selector in selectors_to_remove:
310
- for element in soup.select(selector): element.decompose()
311
-
312
- main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
313
- target_element = main_content if main_content else soup.body
314
- if not target_element:
315
- logger.warning(f"[Web Scraper - Requests/BS4] Could not find a suitable target element (main, article, body) for {url}");
316
- return None
317
-
318
- lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
319
- text = "\n\n".join(lines)
320
-
321
- MIN_TEXT_LENGTH = 100
322
- if not text or len(text) < MIN_TEXT_LENGTH:
323
- logger.warning(f"[Web Scraper - Requests/BS4] Extracted text is too short (<{MIN_TEXT_LENGTH} chars) after cleaning for {url}. Length: {len(text)}. Content might be JS-rendered or blocked.")
324
- return None
325
-
326
- logger.info(f"[Web Scraper - Requests/BS4] Successfully scraped and cleaned content from {url}. Final Length: {len(text)}")
327
- return text
328
-
329
- except requests.exceptions.Timeout: logger.error(f"[Web Scraper - Requests/BS4] Timeout error fetching {url}"); return None
330
- except requests.exceptions.TooManyRedirects: logger.error(f"[Web Scraper - Requests/BS4] Too many redirects error for {url}"); return None
331
- except requests.exceptions.HTTPError as e: logger.error(f"[Web Scraper - Requests/BS4] HTTP error {e.response.status_code} for {url}"); return None
332
- except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - Requests/BS4] General request error for {url}: {e}"); return None
333
- except Exception as e: logger.error(f"[Web Scraper - Requests/BS4] Error during parsing or processing {url}: {e}", exc_info=True); return None
334
-
335
- # Website Content via URLToText API
336
- async def get_website_content_via_urltotext_api(url: str, api_key: str):
337
- """Fetches website content using the URLToText API."""
338
- if not url: logger.error("[Web Scraper - URLToText API] get_website_content_via_urltotext_api called with no URL"); return None
339
- if not api_key: logger.error("[Web Scraper - URLToText API] API key is missing."); return None
340
- logger.info(f"[Web Scraper - URLToText API] Attempting fetch via API: {url}")
341
- api_endpoint = "https://urltotext.com/api/v1/urltotext/"
342
- payload = json.dumps({
343
- "url": url,
344
- "output_format": "text",
345
- "extract_main_content": True,
346
- "render_javascript": True,
347
- "residential_proxy": False,
348
- "timeout_render": 20000,
349
- })
350
- headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"}
351
- try:
352
- response = await asyncio.to_thread(requests.post, api_endpoint, headers=headers, data=payload, timeout=60)
353
- logger.debug(f"[Web Scraper - URLToText API] Received status code {response.status_code} for {url}")
354
- if response.status_code == 200:
355
- try:
356
- data = response.json()
357
- content_data = data.get("data", {})
358
- content = content_data.get("content")
359
- credits = data.get("credits_used", "N/A")
360
- warning = content_data.get("warning")
361
- error_msg = content_data.get("error")
362
-
363
- if warning: logger.warning(f"[Web Scraper - URLToText API] API Warning for {url}: {warning}")
364
- if error_msg: logger.error(f"[Web Scraper - URLToText API] API Error reported for {url}: {error_msg}"); return None
365
-
366
- if content and isinstance(content, str):
367
- logger.info(f"[Web Scraper - URLToText API] Successfully fetched content via API. Length: {len(content.strip())}. Credits Used: {credits}");
368
- return content.strip()
369
- else:
370
- logger.warning(f"[Web Scraper - URLToText API] API returned status 200 but content is empty or invalid for {url}. Response: {data}");
371
- return None
372
- except json.JSONDecodeError: logger.error(f"[Web Scraper - URLToText API] Failed to decode JSON response from API. Status: {response.status_code}. Response Text: {response.text[:500]}..."); return None
373
- except Exception as e: logger.error(f"[Web Scraper - URLToText API] Error processing successful API response: {e}", exc_info=True); return None
374
- elif response.status_code == 400: logger.error(f"[Web Scraper - URLToText API] Bad Request (400) to API. Check payload/URL. Response: {response.text[:200]}...")
375
- elif response.status_code == 401: logger.error(f"[Web Scraper - URLToText API] Unauthorized (401). Check API Key. Response: {response.text[:200]}...")
376
- elif response.status_code == 402: logger.error(f"[Web Scraper - URLToText API] Payment Required (402). Check API credits/plan. Response: {response.text[:200]}...")
377
- elif response.status_code == 422: logger.warning(f"[Web Scraper - URLToText API] Unprocessable URL / Fetch Error (422) reported by API for {url}. Response: {response.text[:200]}...")
378
- elif response.status_code == 429: logger.warning(f"[Web Scraper - URLToText API] Rate Limit Hit (429). Response: {response.text[:200]}...")
379
- elif response.status_code >= 500: logger.error(f"[Web Scraper - URLToText API] API Server Error ({response.status_code}). Response: {response.text[:200]}...")
380
- else: logger.error(f"[Web Scraper - URLToText API] Unexpected status code {response.status_code} from API. Response: {response.text[:200]}...")
381
- return None
382
- except requests.exceptions.Timeout: logger.error(f"[Web Scraper - URLToText API] Timeout connecting to API for {url}"); return None
383
- except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - URLToText API] Request error connecting to API: {e}"); return None
384
- except Exception as e: logger.error(f"[Web Scraper - URLToText API] Unexpected error during API call: {e}", exc_info=True); return None
385
-
386
- # DeepSeek Summary Function (with updated prompts)
387
- async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
388
- """Generates summary using DeepSeek via OpenRouter API."""
389
- logger.info(f"Generating '{summary_type}' summary. Input length: {len(text)}")
390
- if not api_key: logger.error("OpenRouter API key missing."); return "Error: AI service configuration key is missing."
391
- if not text or not text.strip(): logger.warning("generate_summary called with empty or whitespace-only text."); return "Error: No content was provided to summarize."
392
-
393
- openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
394
- model_name = "deepseek/deepseek-chat:free"
395
-
396
- if summary_type == "paragraph":
397
- system_message = (
398
- "You are an expert summarization AI. Your goal is to provide a concise, easy-to-understand summary of the provided text. "
399
- "Follow these instructions precisely:\n"
400
- "1. **Language and Spelling:** Use simple British English. Ensure all spellings conform to British English (e.g., 'summarise', 'centre', 'realise').\n"
401
- "2. **Clarity:** Write clearly so someone unfamiliar with the topic can understand.\n"
402
- "3. **Format:** Output a single paragraph only.\n"
403
- "4. **Conciseness:** The summary must be **no more than 85 words** long.\n"
404
- "5. **Completeness:** Cover the main points from the entire text, not just the start.\n"
405
- "6. **Punctuation:** Do NOT use em dashes (– or —). Use semicolons (;) if needed for complex sentence structure, but prefer simpler sentences.\n"
406
- "7. **Tone:** Maintain a neutral and informative tone.\n"
407
- "8. **Focus:** Extract factual information and key topics. Do not add opinions or information not present in the text."
408
- )
409
- user_prompt_instruction = "Summarize the following text into a single paragraph adhering strictly to the rules outlined in the system message:"
410
-
411
- elif summary_type == "points":
412
- system_message = (
413
- "You are an expert summarization AI. Your goal is to extract the key points from the provided text and present them as a bulleted list. "
414
- "Follow these instructions precisely:\n"
415
- "1. **Language and Spelling:** Use simple British English. Ensure all spellings conform to British English (e.g., 'summarise', 'centre', 'realise').\n"
416
- "2. **Clarity:** Write clearly so someone unfamiliar with the topic can understand.\n"
417
- "3. **Format:** Output as a bulleted list. Start each point with a standard bullet character ('*' or '-'). Each point should be distinct and on a new line.\n"
418
- "4. **Content:** Each bullet point should represent a single key finding, main topic, or significant piece of information from the text.\n"
419
- "5. **Conciseness:** Keep each bullet point brief and to the point.\n"
420
- "6. **Completeness:** Cover the main points from the entire text, not just the start.\n"
421
- "7. **Punctuation:** Do NOT use em dashes (– or —) within bullet points.\n"
422
- "8. **Tone:** Maintain a neutral and informative tone.\n"
423
- "9. **Focus:** Extract factual information and key topics. Do not add opinions or information not present in the text."
424
- )
425
- user_prompt_instruction = "Summarize the following text into a bulleted list adhering strictly to the rules outlined in the system message:"
426
- else:
427
- logger.error(f"Invalid summary_type '{summary_type}' requested.")
428
- return f"Error: Invalid summary type ('{summary_type}') requested. Please choose 'paragraph' or 'points'."
429
-
430
- MAX_INPUT_TOKENS_ESTIMATE = 28000
431
- AVG_CHARS_PER_TOKEN = 4
432
- MAX_INPUT_LENGTH = MAX_INPUT_TOKENS_ESTIMATE * AVG_CHARS_PER_TOKEN
433
-
434
- if len(text) > MAX_INPUT_LENGTH:
435
- logger.warning(f"Input text length ({len(text)} chars) exceeds estimated limit ({MAX_INPUT_LENGTH}). Truncating.")
436
- truncation_marker = "\n\n[... Text truncated due to length ...]"
437
- text = text[:MAX_INPUT_LENGTH - len(truncation_marker)] + truncation_marker
438
-
439
- messages = [
440
- {"role": "system", "content": system_message},
441
- {"role": "user", "content": f"{user_prompt_instruction}\n\n--- TEXT TO SUMMARIZE ---\n\n{text}\n\n--- END OF TEXT ---"}
442
- ]
443
-
444
- space_host = os.environ.get("SPACE_HOST", "huggingface.co/spaces/YOUR_SPACE_NAME")
445
- referer_url = f"https://{space_host}" if space_host and not space_host.startswith("http") else space_host or "https://huggingface.co"
446
- headers = {
447
- "Authorization": f"Bearer {api_key}",
448
- "Content-Type": "application/json",
449
- "HTTP-Referer": referer_url,
450
- "X-Title": "Telegram URL Summarizer Bot"
451
- }
452
- payload = json.dumps({"model": model_name, "messages": messages})
453
-
454
- try:
455
- logger.debug(f"Sending request to OpenRouter (Model: {model_name}). Prompt length approx: {len(text)} chars.")
456
- response = await asyncio.to_thread(requests.post, openrouter_api_endpoint, headers=headers, data=payload, timeout=120)
457
- logger.debug(f"Received status {response.status_code} from OpenRouter.")
458
-
459
- if response.status_code == 200:
460
- try:
461
- data = response.json()
462
- choice = data.get("choices", [{}])[0]
463
- message = choice.get("message", {})
464
- summary = message.get("content")
465
- finish_reason = choice.get("finish_reason")
466
-
467
- if summary and isinstance(summary, str) and summary.strip():
468
- summary = summary.strip()
469
- logger.info(f"Successfully generated summary. Finish Reason: {finish_reason}. Length: {len(summary)}")
470
- if summary_type == "paragraph" and len(summary.split()) > 95:
471
- logger.warning(f"Generated paragraph summary slightly longer than target word count ({len(summary.split())} words).")
472
- return summary
473
- else:
474
- logger.warning(f"OpenRouter returned status 200 but summary content is missing or empty. Response data: {data}")
475
- return "Sorry, the AI model returned an empty summary. The content might have been unsuitable."
476
-
477
- except (json.JSONDecodeError, IndexError, KeyError, AttributeError) as e:
478
- logger.error(f"Failed to parse successful (200) response from OpenRouter. Error: {e}. Response Text: {response.text[:500]}...", exc_info=True)
479
- return "Sorry, there was an issue parsing the response from the AI service."
480
- except Exception as e:
481
- logger.error(f"Unexpected error processing OpenRouter success response: {e}", exc_info=True)
482
- return "Sorry, an unexpected error occurred while processing the AI response."
483
-
484
- elif response.status_code == 401: logger.error("OpenRouter API key is invalid (Unauthorized - 401)."); return "Error: AI service authentication failed. Please check the configuration."
485
- elif response.status_code == 402: logger.error("OpenRouter Payment Required (402). Check credits/limits."); return "Sorry, there's an issue with the AI service account limits or payment."
486
- elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Hit (429)."); return "Sorry, the AI model is currently busy due to high demand. Please try again in a moment."
487
- elif response.status_code == 400: logger.error(f"OpenRouter Bad Request (400). Likely prompt issue. Response: {response.text[:500]}..."); return "Sorry, the request to the AI service was invalid (possibly due to the content or prompt)."
488
- elif response.status_code >= 500: logger.error(f"OpenRouter Server Error ({response.status_code}). Response: {response.text[:500]}..."); return "Sorry, the AI service is experiencing internal issues. Please try again later."
489
- else:
490
- logger.error(f"Unexpected HTTP status {response.status_code} from OpenRouter. Response: {response.text[:500]}...")
491
- try:
492
- error_data = response.json()
493
- error_msg = error_data.get("error", {}).get("message", response.text[:100])
494
- return f"Sorry, the AI service returned an error ({response.status_code}): {error_msg}"
495
- except json.JSONDecodeError:
496
- return f"Sorry, the AI service returned an unexpected error (Status: {response.status_code})."
497
-
498
- except requests.exceptions.Timeout: logger.error("Timeout connecting to OpenRouter API."); return "Sorry, the request to the AI model timed out. Please try again."
499
- except requests.exceptions.RequestException as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, there was a network error connecting to the AI model service."
500
- except Exception as e: logger.error(f"Unexpected error occurred within generate_summary function: {e}", exc_info=True); return "Sorry, an unexpected internal error occurred while generating the summary."
501
-
502
-
503
- # --- Background Task Processing ---
504
 
 
505
  async def process_summary_task(
506
  user_id: int,
507
  chat_id: int,
508
  message_id_to_edit: int,
509
  url: str,
510
  summary_type: str,
511
- bot: Bot # Pass the Bot instance
512
  ) -> None:
513
  """Handles the actual fetching and summarization in a background task."""
514
- task_id = asyncio.current_task().get_name() if hasattr(asyncio.current_task(), 'get_name') else id(asyncio.current_task())
515
- logger.info(f"[Task {task_id}] Starting processing for user {user_id}, chat {chat_id}, msg {message_id_to_edit}, type '{summary_type}'")
516
-
517
- # Fetch current API keys within the task
518
- current_openrouter_key = os.environ.get('OPENROUTER_API_KEY')
519
- current_urltotext_key = os.environ.get('URLTOTEXT_API_KEY')
520
- current_supadata_key = os.environ.get('SUPADATA_API_KEY')
521
- current_apify_token = os.environ.get('APIFY_API_TOKEN')
522
- # Keys check (already done in handler, but good for task log context)
523
- keys_present = f"OR={'Y' if current_openrouter_key else 'N'}, UTT={'Y' if current_urltotext_key else 'N'}, SD={'Y' if current_supadata_key else 'N'}, AP={'Y' if current_apify_token else 'N'}"
524
- logger.debug(f"[Task {task_id}] API Key check: {keys_present}")
525
-
526
- if not current_openrouter_key:
527
- logger.error(f"[Task {task_id}] CRITICAL: OpenRouter API key is missing. Cannot generate summary.")
528
- try:
529
- # Edit the original message to show the config error
530
- await bot.edit_message_text(
531
- chat_id=chat_id,
532
- message_id=message_id_to_edit,
533
- text="❌ Configuration Error: The AI summarization service is not configured correctly. Please contact the administrator."
534
- )
535
- except Exception as edit_err:
536
- logger.error(f"[Task {task_id}] Failed to edit message for missing AI key: {edit_err}")
537
- return # Stop task
538
-
539
- # --- Inform User Processing Has Started ---
540
- processing_message_text = f"⏳ Working on your '{summary_type}' summary for the link...\n_(This might take up to a minute depending on the content)_"
541
- status_message_sent_id = None # Track if we sent a separate message
542
-
543
  try:
544
- await bot.edit_message_text(
545
- chat_id=chat_id,
546
- message_id=message_id_to_edit,
547
- text=processing_message_text
548
- )
549
- logger.debug(f"[Task {task_id}] Successfully edited message {message_id_to_edit} to 'Working...'")
550
- except (TimedOut, NetworkError, BadRequest) as e: # Catch specific Telegram errors
551
- # If editing fails (message too old, deleted, bot blocked, rate limit, etc.)
552
- logger.warning(f"[Task {task_id}] Could not edit original message {message_id_to_edit}: {e!r}. Sending new status message.")
553
- message_id_to_edit = None # Mark original message as uneditable/not-to-be-deleted later
554
- try:
555
- status_message = await bot.send_message(chat_id=chat_id, text=processing_message_text)
556
- status_message_sent_id = status_message.message_id
557
- logger.debug(f"[Task {task_id}] Sent new status message {status_message_sent_id}.")
558
- except Exception as send_err:
559
- logger.error(f"[Task {task_id}] Failed sending NEW 'Working...' status message: {send_err}. Processing continues without feedback.")
560
- # Proceed, but user gets no feedback
561
- except Exception as e:
562
- # Catch unexpected errors during editing
563
- logger.error(f"[Task {task_id}] Unexpected error editing message {message_id_to_edit}: {e!r}", exc_info=True)
564
- message_id_to_edit = None # Assume original message is problematic
565
-
566
- # --- Main Content Fetching and Summarization ---
567
- content = None
568
- user_feedback_message = None
569
- success = False
570
 
571
- try:
572
- # Send 'typing' action to indicate activity
573
- try:
574
- logger.debug(f"[Task {task_id}] Sending 'typing' chat action to chat {chat_id}")
575
- await bot.send_chat_action(chat_id=chat_id, action='typing')
576
- except Exception as ca_err:
577
- logger.warning(f"[Task {task_id}] Failed sending 'typing' action: {ca_err}")
578
 
579
- # --- Determine Content Type and Fetch ---
580
- is_yt = is_youtube_url(url)
581
- logger.debug(f"[Task {task_id}] URL ({url}) is YouTube: {is_yt}")
 
582
 
583
- if is_yt:
584
- video_id = extract_youtube_id(url)
585
- if video_id:
586
- logger.info(f"[Task {task_id}] Fetching YouTube transcript for video ID: {video_id}")
587
- content = await get_youtube_transcript(video_id, url, current_supadata_key, current_apify_token)
588
- if not content:
589
- logger.warning(f"[Task {task_id}] Failed to get YouTube transcript for {video_id}.")
590
- user_feedback_message = "⚠️ Sorry, I couldn't retrieve the transcript for that YouTube video. It might be unavailable, private, have captions disabled, or an error occurred."
591
- else:
592
- logger.info(f"[Task {task_id}] Successfully fetched YouTube transcript for {video_id}. Length: {len(content)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
  else:
594
- logger.warning(f"[Task {task_id}] Failed to extract YouTube video ID from URL: {url}")
595
- user_feedback_message = "⚠️ Sorry, I couldn't identify a valid YouTube video ID in the link you provided."
596
- else:
597
- # --- Website Scraping ---
598
- logger.info(f"[Task {task_id}] Attempting website scrape (Requests/BS4) for URL: {url}")
599
- content = await get_website_content_via_requests(url)
 
 
 
600
  if content:
601
- logger.info(f"[Task {task_id}] Website scrape successful (Requests/BS4). Length: {len(content)}")
602
- else:
603
- logger.warning(f"[Task {task_id}] Primary website scrape failed for {url}. Trying fallback API.")
604
- if current_urltotext_key:
605
- try: await bot.send_chat_action(chat_id=chat_id, action='typing'); logger.debug("[Task {task_id}] Sent typing before fallback scrape.")
606
- except: pass
607
 
608
- logger.info(f"[Task {task_id}] Attempting website scrape via URLToText API for: {url}")
609
- content = await get_website_content_via_urltotext_api(url, current_urltotext_key)
610
- if content:
611
- logger.info(f"[Task {task_id}] Website scrape successful via URLToText API. Length: {len(content)}")
612
- else:
613
- logger.warning(f"[Task {task_id}] Fallback website scrape (URLToText API) also failed for {url}.")
614
- user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website using available methods. It might be protected or structured in a way I can't parse."
615
  else:
616
- logger.warning(f"[Task {task_id}] Primary scrape failed and URLToText API key not configured. Cannot fallback for {url}.")
617
- user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website, and the fallback service isn't configured."
618
-
619
- # --- Generate Summary if Content Was Fetched ---
620
- if content:
621
- logger.info(f"[Task {task_id}] Content fetched (Length: {len(content)}). Generating '{summary_type}' summary.")
622
- try: await bot.send_chat_action(chat_id=chat_id, action='typing'); logger.debug("[Task {task_id}] Sent typing before AI summary generation.")
623
- except: pass
624
-
625
- summary = await generate_summary(content, summary_type, current_openrouter_key)
626
-
627
- if summary.startswith("Error:") or summary.startswith("Sorry,"):
628
- logger.warning(f"[Task {task_id}] AI summary generation failed. Reason: {summary}")
629
- user_feedback_message = f"⚠️ {summary}"
630
- else:
631
- # --- Summary Success - Send to User ---
632
- logger.info(f"[Task {task_id}] Summary generated successfully. Length: {len(summary)}. Sending result.")
633
- try:
634
- await bot.send_message(
635
- chat_id=chat_id,
636
- text=summary,
637
- parse_mode=ParseMode.MARKDOWN,
638
- link_preview_options={'is_disabled': True}
639
- )
640
  success = True
641
- user_feedback_message = None
642
- logger.info(f"[Task {task_id}] Successfully sent summary to chat {chat_id}.")
643
- except Exception as send_final_err:
644
- logger.error(f"[Task {task_id}] Failed sending final summary to chat {chat_id}: {send_final_err}", exc_info=True)
645
- user_feedback_message = "⚠️ Sorry, an unexpected error occurred while sending the final summary."
646
- success = False
647
 
648
- elif not user_feedback_message:
649
- logger.warning(f"[Task {task_id}] Content retrieval resulted in None, but no specific user feedback message was set. URL: {url}")
650
- user_feedback_message = "⚠️ Sorry, I couldn't retrieve any usable content from the link provided."
651
 
652
  # --- Send Final Feedback Message if Processing Failed ---
653
  if user_feedback_message and not success:
654
- logger.warning(f"[Task {task_id}] Processing failed or summary sending failed. Sending feedback: {user_feedback_message}")
655
- try:
656
  await bot.send_message(chat_id=chat_id, text=user_feedback_message)
657
- except Exception as send_feedback_err:
658
- logger.error(f"[Task {task_id}] Failed sending final FAILURE feedback message to chat {chat_id}: {send_feedback_err}")
659
 
660
  except Exception as e:
661
- # Catch-all for unexpected errors during the main processing block
662
- logger.error(f"[Task {task_id}] Unexpected critical error during task processing for user {user_id}, URL {url}: {e}", exc_info=True)
663
  try:
664
- await bot.send_message(chat_id=chat_id, text="❌ Oops! An unexpected internal error occurred while processing your request. The issue has been logged.")
665
- except Exception as final_err:
666
- logger.error(f"[Task {task_id}] Failed sending CRITICAL internal error feedback message to chat {chat_id}: {final_err}")
667
- success = False
668
-
 
669
  finally:
670
  # --- Clean up Status Message(s) ---
671
- logger.debug(f"[Task {task_id}] Cleaning up status message(s). Success={success}")
672
  try:
673
  if status_message_sent_id:
674
- # If we sent a separate "Working..." message, delete it.
675
  await bot.delete_message(chat_id=chat_id, message_id=status_message_sent_id)
676
- logger.debug(f"[Task {task_id}] Deleted separate status message {status_message_sent_id}.")
677
- elif message_id_to_edit:
678
- # If we successfully edited the original message...
679
- if success:
680
- # If processing succeeded, delete the original "Working..." message.
681
- await bot.delete_message(chat_id=chat_id, message_id=message_id_to_edit)
682
- logger.debug(f"[Task {task_id}] Processing succeeded. Deleted original (edited) message {message_id_to_edit}.")
683
- else:
684
- # If processing failed, edit the message to show failure (if possible)
685
- logger.debug(f"[Task {task_id}] Processing failed. Attempting to edit message {message_id_to_edit} to show error.")
686
- final_error_text = user_feedback_message or "❌ An error occurred."
687
- try:
688
- await bot.edit_message_text(chat_id=chat_id, message_id=message_id_to_edit, text=final_error_text[:4090]) # Truncate error message if needed
689
- except Exception as final_edit_err:
690
- logger.warning(f"[Task {task_id}] Could not edit message {message_id_to_edit} to show final error state: {final_edit_err!r}")
691
- # If message_id_to_edit is None, we couldn't edit it initially, and if status_message_sent_id is None, we failed to send a new one. Nothing to clean up.
692
-
693
- except Exception as del_e:
694
- logger.warning(f"[Task {task_id}] Could not delete status/button message during cleanup: {del_e!r}")
695
 
696
- logger.info(f"[Task {task_id}] Finished task processing for user {user_id}. Overall Success: {success}")
697
 
698
  # --- Telegram Bot Handlers ---
699
-
700
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
701
  """Handles the /start command."""
702
  user = update.effective_user
703
  if not user: return
704
- logger.info(f"User {user.id} ({user.username or 'NoUsername'}) initiated /start.")
705
  mention = user.mention_html() if user.username else user.first_name
706
  start_message = (
707
  f"👋 Hello {mention}!\n\n"
@@ -717,15 +290,14 @@ async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> No
717
  logger.info(f"User {user.id if user else '?'} requested /help.")
718
  help_text = (
719
  "**How to Use Me:**\n"
720
- "1. Send me a direct link (URL) to a YouTube video or a web article.\n"
721
- "2. I will ask you to choose the summary format: `Paragraph` or `Points`.\n"
722
- "3. Click the button for your preferred format.\n"
723
- "4. I'll fetch the content, summarise it using AI, and send it back to you!\n\n"
724
  "**Important Notes:**\n"
725
- "- **YouTube:** Getting transcripts can sometimes fail if they are disabled, unavailable for the video's language, or if YouTube temporarily blocks requests.\n"
726
- "- **Websites:** I do my best to extract the main article content, but complex websites (especially those heavily reliant on JavaScript or with strong anti-scraping measures) might not work perfectly. I have a fallback service to help with tricky sites.\n"
727
- "- **AI Summaries:** The AI tries its best to be accurate and follow the requested format, but errors or unexpected outputs are possible.\n"
728
- "- **Length:** Very long articles or videos might be truncated before summarization to fit within processing limits.\n\n"
729
  "Just send a link to get started!"
730
  )
731
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
@@ -742,9 +314,8 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
742
 
743
  if match:
744
  url = match.group(0)
745
- logger.info(f"User {user.id} sent potential URL: {url}")
746
  context.user_data['url_to_summarize'] = url
747
- logger.debug(f"Stored URL '{url}' in user_data for user {user.id}")
748
 
749
  keyboard = [
750
  [
@@ -759,123 +330,83 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
759
  parse_mode=ParseMode.MARKDOWN,
760
  link_preview_options={'is_disabled': True}
761
  )
762
- else:
763
- if not message_text.startswith('/'):
764
- await update.message.reply_text("Please send me a valid URL (starting with http:// or https://) to summarize.")
765
-
766
 
767
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
768
- """Handles button presses: gets data, clears context, schedules background task."""
769
  query = update.callback_query
770
  if not query or not query.from_user or not query.message:
771
- logger.warning("Callback query, user, or message missing in update.")
772
- # Try answering query even if message is missing, just to acknowledge
773
- if query:
774
- try: await query.answer("Error: Missing information.")
775
- except Exception: pass
776
  return
777
- user = query.from_user
778
-
779
- # We skip query.answer() here to avoid potential connection issues.
780
- # The button might stay loading visually for the user.
781
 
 
 
 
782
  summary_type = query.data
783
  url = context.user_data.get('url_to_summarize')
784
- query_id = query.id # For logging
785
 
786
- logger.info(f"User {user.id} chose summary type '{summary_type}'. Query ID: {query_id}. Checking for stored URL.")
787
 
788
  if not url:
789
- logger.warning(f"User {user.id} (Query {query_id}) pressed button '{summary_type}', but NO URL found in user_data context.")
790
  try:
791
  await query.edit_message_text(text="⚠️ Oops! I lost the context for that link. Please send the link again.")
792
- except Exception as edit_err:
793
- logger.error(f"Failed to edit message for lost context (Query {query_id}): {edit_err}")
794
- # Still try to answer the query if editing failed
795
- try: await query.answer("Error processing request.")
796
- except Exception: pass
797
  return
798
 
799
- logger.info(f"Scheduling background task for URL '{url}' (User {user.id}, Query {query_id}, Type '{summary_type}').")
800
-
801
- # Extract necessary IDs before clearing data
802
- user_id = user.id
803
- chat_id = query.message.chat_id
804
- message_id_to_edit = query.message.message_id
805
- bot_instance = context.bot # Get the bot instance from context
806
-
807
- # Clear the URL from context *before* scheduling the task
808
  context.user_data.pop('url_to_summarize', None)
809
- logger.debug(f"Cleared URL from user_data for user {user_id} (Query {query_id})")
810
 
811
- # Schedule the actual processing function to run in the background
812
- # Pass all required data explicitly
813
  asyncio.create_task(
814
  process_summary_task(
815
- user_id=user_id,
816
- chat_id=chat_id,
817
- message_id_to_edit=message_id_to_edit,
818
  url=url,
819
  summary_type=summary_type,
820
- bot=bot_instance
821
  ),
822
- name=f"SummaryTask-{user_id}-{message_id_to_edit}" # Optional: name the task
823
  )
824
 
825
- # Log that the task was scheduled and the handler is returning.
826
- logger.debug(f"Callback handler for Query {query_id} finished after scheduling task.")
827
- # DO NOT await the task here. Let the handler return immediately.
828
-
829
-
830
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
831
  """Log Errors caused by Updates or background tasks."""
832
- # Check if the error is from an Exception raised in a handler
833
  if context.error:
834
  logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
835
- if isinstance(context.error, TimedOut):
836
- logger.warning("A timeout error occurred in PTB communication.")
837
- elif isinstance(context.error, NetworkError):
838
- logger.warning(f"A network error occurred: {context.error}")
839
- # Add more specific error handling if needed
840
- else:
841
- # Log errors from background tasks if PTB captures them this way (might need custom handling)
842
- logger.error(f"Unknown error occurred. Update: {update} | Context: {context}")
843
-
844
 
845
  # --- Bot Setup Function ---
846
  async def setup_bot_config() -> Application:
847
- """Configures the PTB Application with custom HTTPX settings for PTB v20.x."""
848
  logger.info("Configuring Telegram Application...")
849
  if not TELEGRAM_TOKEN:
850
- logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable not found.")
851
  raise ValueError("TELEGRAM_TOKEN environment variable not set.")
852
 
853
- connect_timeout = 10.0
854
- read_timeout = 30.0
855
- write_timeout = 30.0
856
- pool_timeout = 30.0
857
-
858
- logger.info(f"Creating PTB HTTPXRequest (v20 compatible) with settings: "
859
- f"connect_timeout={connect_timeout}, read_timeout={read_timeout}, "
860
- f"write_timeout={write_timeout}, pool_timeout={pool_timeout}. "
861
- f"(Pool size uses httpx default)")
862
-
863
  custom_request = HTTPXRequest(
864
- connect_timeout=connect_timeout,
865
- read_timeout=read_timeout,
866
- write_timeout=write_timeout,
867
- pool_timeout=pool_timeout,
868
  http_version="1.1"
869
  )
870
 
871
- application_builder = Application.builder().token(TELEGRAM_TOKEN)
872
- application_builder.request(custom_request)
873
- application = application_builder.build()
 
 
 
874
 
875
  application.add_handler(CommandHandler("start", start))
876
  application.add_handler(CommandHandler("help", help_command))
877
  application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
878
- application.add_handler(CallbackQueryHandler(handle_summary_type_callback)) # This handler now just schedules the task
879
  application.add_error_handler(error_handler)
880
 
881
  logger.info("Telegram application handlers configured.")
@@ -890,140 +421,78 @@ async def lifespan(app: Starlette):
890
 
891
  try:
892
  ptb_app = await setup_bot_config()
893
- logger.info("PTB Application object configured. Initializing...")
894
  await ptb_app.initialize()
895
- logger.info("PTB Application initialized. Starting background tasks (e.g., job queue)...")
896
- await ptb_app.start() # Starts dispatcher, job queue, etc. but NOT polling
897
 
898
- bot_instance = ptb_app.bot
899
- bot_info = await bot_instance.get_me()
900
- logger.info(f"PTB Application started successfully. Bot ID: {bot_info.id}, Username: @{bot_info.username}")
901
 
902
  WEBHOOK_URL_BASE = os.environ.get("SPACE_HOST")
903
  if WEBHOOK_URL_BASE:
904
- if not WEBHOOK_URL_BASE.startswith("https://"): WEBHOOK_URL_BASE = f"https://{WEBHOOK_URL_BASE}"
 
905
  webhook_path = "/webhook"
906
  full_webhook_url = f"{WEBHOOK_URL_BASE.rstrip('/')}{webhook_path}"
907
 
908
- logger.info(f"Attempting to set Telegram webhook to: {full_webhook_url}")
909
- await asyncio.sleep(2.0) # Short delay before setting webhook
910
  try:
911
- await bot_instance.set_webhook(
912
  url=full_webhook_url,
913
  allowed_updates=Update.ALL_TYPES,
914
- # drop_pending_updates=True # Consider adding this if startup issues persist
915
  )
916
- webhook_info = await bot_instance.get_webhook_info()
917
- if webhook_info and webhook_info.url == full_webhook_url:
918
- logger.info(f"Telegram webhook set successfully! Current info: {webhook_info}")
919
- elif webhook_info:
920
- logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', Got: {webhook_info.url}. Info: {webhook_info}")
921
- else:
922
- logger.error("Failed to get webhook info after setting webhook.")
923
- except RetryAfter as e:
924
- logger.warning(f"Webhook setting throttled by Telegram (RetryAfter: {e.retry_after}s). Another instance likely succeeded or try again later.")
925
- await asyncio.sleep(e.retry_after or 2)
926
- try:
927
- webhook_info = await bot_instance.get_webhook_info()
928
- logger.info(f"Webhook info after RetryAfter delay: {webhook_info}")
929
- except Exception as get_info_err:
930
- logger.error(f"Failed to get webhook info after RetryAfter delay: {get_info_err}")
931
  except Exception as e:
932
- logger.error(f"Failed to set Telegram webhook to {full_webhook_url}: {e}", exc_info=True)
933
- else:
934
- logger.warning("SPACE_HOST environment variable not found. Cannot set webhook automatically. Bot will not receive updates via webhook.")
935
 
936
- logger.info("ASGI Lifespan: Startup complete. Application is ready to yield.")
937
- yield # --- Application runs here ---
938
 
939
  except Exception as startup_err:
940
- logger.critical(f"CRITICAL ERROR during ASGI application startup: {startup_err}", exc_info=True)
941
- # Log traceback explicitly before raising might help in some environments
942
- traceback.print_exc()
943
  raise
944
  finally:
945
- # --- Shutdown Sequence ---
946
  logger.info("ASGI Lifespan: Shutdown sequence initiated...")
947
  if ptb_app:
948
- bot_username = ptb_app.bot.username if ptb_app.bot and ptb_app.bot.username else "N/A"
949
- logger.info(f"PTB App instance found for @{bot_username}. Checking if running...")
950
- is_running = getattr(ptb_app, '_running', False)
951
- if is_running:
952
- try:
953
- logger.info("Stopping PTB Application's background tasks...")
954
- await ptb_app.stop()
955
- logger.info("Shutting down PTB Application connections and resources...")
956
- await ptb_app.shutdown()
957
- logger.info("PTB Application shut down gracefully.")
958
- except Exception as shutdown_err:
959
- logger.error(f"Error during PTB Application shutdown: {shutdown_err}", exc_info=True)
960
- else:
961
- logger.warning("PTB Application instance exists but was not marked as running at shutdown.")
962
- try:
963
- await ptb_app.shutdown()
964
- logger.info("Attempted shutdown of non-running PTB app completed.")
965
- except Exception as shutdown_err:
966
- logger.error(f"Error during shutdown of non-running PTB app: {shutdown_err}", exc_info=True)
967
- else:
968
- logger.warning("No PTB Application instance (ptb_app) found during ASGI shutdown.")
969
  logger.info("ASGI Lifespan: Shutdown complete.")
970
 
971
-
972
- # --- Flask App Setup (for Webhook Route) ---
973
  flask_core_app = Flask(__name__)
974
- logger.info("Core Flask app instance created (used by Starlette for routing).")
975
 
976
- # --- Define Flask Routes ---
977
  @flask_core_app.route('/')
978
  def index():
979
  """Basic health check endpoint."""
980
- logger.debug("Health check endpoint '/' accessed.")
981
- bot_status = "Unknown / Not Initialized"
982
  if ptb_app and ptb_app.bot:
983
- is_running = getattr(ptb_app, '_running', False)
984
- bot_status = f"Running (@{ptb_app.bot.username})" if is_running else f"Initialized/Stopped (@{ptb_app.bot.username})"
985
- return f"Telegram Bot Summarizer - Status: {bot_status} - Listening via Starlette/Uvicorn."
986
 
987
  @flask_core_app.route('/webhook', methods=['POST'])
988
  async def webhook() -> Response:
989
  """Webhook endpoint called by Telegram."""
990
- global ptb_app
991
-
992
  if not ptb_app:
993
- logger.error("Webhook triggered, but PTB Application instance (ptb_app) is None. Lifespan likely failed.")
994
- return Response('Bot service is not configured or failed during startup.', status=503)
995
 
996
- is_running = getattr(ptb_app, '_running', False)
997
- if not is_running:
998
- logger.error("Webhook triggered, but PTB Application is not currently running.")
999
- return Response('Bot service is initialized but not actively running.', status=503)
1000
-
1001
- logger.debug("Webhook endpoint received POST request from Telegram.")
1002
  try:
1003
  update_data = request.get_json()
1004
  if not update_data:
1005
- logger.warning("Received empty or non-JSON data on webhook.")
1006
- return Response('Bad Request: Expected JSON payload.', status=400)
1007
 
1008
  update = Update.de_json(update_data, ptb_app.bot)
1009
- logger.debug(f"Processing update_id: {update.update_id} via webhook route.")
1010
-
1011
- # Let PTB's dispatcher handle the update asynchronously
1012
- # This will now call the appropriate handler (e.g., handle_summary_type_callback)
1013
- # which will *quickly* schedule the background task and return.
1014
  await ptb_app.process_update(update)
1015
-
1016
- logger.debug(f"Finished processing update_id: {update.update_id} in webhook handler (task scheduled).")
1017
- # Return 200 OK immediately to Telegram
1018
  return Response('ok', status=200)
1019
 
1020
- except json.JSONDecodeError:
1021
- logger.error("Failed to decode JSON from Telegram webhook request.", exc_info=True)
1022
- return Response('Bad Request: Invalid JSON format.', status=400)
1023
  except Exception as e:
1024
- logger.error(f"Error processing update in webhook handler: {e}", exc_info=True)
1025
- return Response('Internal Server Error processing update.', status=500)
1026
-
1027
 
1028
  # --- Create Starlette ASGI Application ---
1029
  app = Starlette(
@@ -1033,25 +502,10 @@ app = Starlette(
1033
  Mount("/", app=WSGIMiddleware(flask_core_app))
1034
  ]
1035
  )
1036
- logger.info("Starlette ASGI application created, configured with lifespan and Flask app mounted at '/'.")
1037
-
1038
 
1039
  # --- Development Server Execution Block ---
1040
  if __name__ == '__main__':
1041
- logger.warning("=" * 50)
1042
- logger.warning(" RUNNING SCRIPT DIRECTLY (using __main__) ".center(50, "="))
1043
- logger.warning("=" * 50)
1044
- logger.warning("This mode starts the Flask development server.")
1045
- logger.warning("!!! IT DOES **NOT** RUN THE ASGI LIFESPAN !!!")
1046
- logger.warning("!!! The Telegram Bot (PTB Application) WILL NOT INITIALIZE OR RUN !!!")
1047
- logger.warning("This is suitable ONLY for verifying Flask routes locally.")
1048
- logger.warning("For proper testing/deployment, use: uvicorn main:app --reload --port 8080")
1049
- logger.warning("or via Gunicorn: gunicorn -c gunicorn.conf.py main:app")
1050
- logger.warning("=" * 50)
1051
-
1052
- if not TELEGRAM_TOKEN:
1053
- logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable missing. Aborting direct Flask start.")
1054
- else:
1055
- local_port = int(os.environ.get('PORT', 8080))
1056
- logger.info(f"Starting Flask development server on http://0.0.0.0:{local_port}")
1057
- flask_core_app.run(host='0.0.0.0', port=local_port, debug=True, use_reloader=False)
 
1
+ # main.py (Revised with background task connection fixes)
2
  import os
3
  import re
4
  import logging
5
  import asyncio
6
  import json
7
+ import html
8
+ import contextlib
9
+ import traceback
10
+ from typing import Optional
11
 
12
  # --- Frameworks ---
13
+ from flask import Flask, request, Response
14
+ from starlette.applications import Starlette
15
+ from starlette.routing import Mount
16
+ from starlette.middleware.wsgi import WSGIMiddleware
17
 
18
  # --- Telegram Bot ---
19
+ from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup, Bot
20
  from telegram.ext import (
21
  Application,
22
  CommandHandler,
 
26
  CallbackQueryHandler,
27
  )
28
  from telegram.constants import ParseMode
29
+ from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest
30
  from telegram.request import HTTPXRequest
31
 
32
  # --- Other Libraries ---
 
34
  from youtube_transcript_api import YouTubeTranscriptApi
35
  import requests
36
  from bs4 import BeautifulSoup
37
+ from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
38
+
39
  _apify_token_exists = bool(os.environ.get('APIFY_API_TOKEN'))
40
  if _apify_token_exists:
41
  from apify_client import ApifyClient
 
58
  logger = logging.getLogger(__name__)
59
  logger.info("Logging configured.")
60
 
61
+ # --- Global variable for PTB app ---
62
+ ptb_app: Optional[Application] = None
63
 
64
  # --- Environment Variable Loading ---
65
  logger.info("Attempting to load secrets...")
 
76
  APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
77
  logger.info("Secret loading attempt finished.")
78
 
79
+ # --- Retry Decorator for Bot Operations ---
80
+ def retry_bot_operation(func):
81
+ @retry(
82
+ stop=stop_after_attempt(3),
83
+ wait=wait_exponential(multiplier=1, min=1, max=10),
84
+ retry=retry_if_exception_type((NetworkError, RuntimeError)),
85
+ before_sleep=lambda retry_state: logger.warning(
86
+ f"Retrying bot operation due to {retry_state.outcome.exception()}. "
87
+ f"Attempt {retry_state.attempt_number}/3"
88
+ )
89
+ )
90
+ async def wrapper(*args, **kwargs):
91
+ try:
92
+ return await func(*args, **kwargs)
93
+ except Exception as e:
94
+ logger.error(f"Operation failed after retries: {e}")
95
+ raise
96
+ return wrapper
97
 
98
+ # --- Helper Functions (unchanged from your original) ---
 
 
 
 
 
 
99
  def is_youtube_url(url):
100
  """Checks if the URL is a valid YouTube video or shorts URL."""
101
  youtube_regex = r'(https?://)?(www\.)?(youtube\.com/(watch\?v=|shorts/)|youtu\.be/)([\w-]{11})'
 
115
  logger.warning(f"Could not extract YouTube ID from URL: {url}")
116
  return None
117
 
118
+ # --- Content Fetching Functions (unchanged from your original) ---
119
+ # [Keep all your existing content fetching functions exactly as they were]
120
+ # get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript,
121
+ # get_website_content_via_requests, get_website_content_via_urltotext_api, generate_summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ # --- Revised Background Task Processing ---
124
  async def process_summary_task(
125
  user_id: int,
126
  chat_id: int,
127
  message_id_to_edit: int,
128
  url: str,
129
  summary_type: str,
130
+ bot_token: str # Now receiving token instead of bot instance
131
  ) -> None:
132
  """Handles the actual fetching and summarization in a background task."""
133
+ task_id = f"{user_id}-{message_id_to_edit}"
134
+ logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
135
+
136
+ # Create a new bot instance for this task
137
+ bot = Bot(token=bot_token)
138
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  try:
140
+ # --- Inform User Processing Has Started ---
141
+ processing_message_text = f"⏳ Working on your '{summary_type}' summary for the link...\n_(This might take up to a minute depending on the content)_"
142
+ status_message_sent_id = None
143
+
144
+ @retry_bot_operation
145
+ async def edit_or_send_status():
146
+ nonlocal status_message_sent_id, message_id_to_edit
147
+ try:
148
+ await bot.edit_message_text(
149
+ chat_id=chat_id,
150
+ message_id=message_id_to_edit,
151
+ text=processing_message_text
152
+ )
153
+ logger.debug(f"[Task {task_id}] Successfully edited message {message_id_to_edit}")
154
+ except (TimedOut, NetworkError, BadRequest) as e:
155
+ logger.warning(f"[Task {task_id}] Could not edit original message: {e}. Sending new status message.")
156
+ message_id_to_edit = None
157
+ status_message = await bot.send_message(
158
+ chat_id=chat_id,
159
+ text=processing_message_text
160
+ )
161
+ status_message_sent_id = status_message.message_id
162
+ logger.debug(f"[Task {task_id}] Sent new status message {status_message_sent_id}")
 
 
 
163
 
164
+ await edit_or_send_status()
 
 
 
 
 
 
165
 
166
+ # --- Main Content Fetching and Summarization ---
167
+ content = None
168
+ user_feedback_message = None
169
+ success = False
170
 
171
+ try:
172
+ # Send 'typing' action
173
+ @retry_bot_operation
174
+ async def send_typing():
175
+ await bot.send_chat_action(chat_id=chat_id, action='typing')
176
+
177
+ await send_typing()
178
+
179
+ # --- Determine Content Type and Fetch ---
180
+ is_yt = is_youtube_url(url)
181
+ logger.debug(f"[Task {task_id}] URL is YouTube: {is_yt}")
182
+
183
+ if is_yt:
184
+ video_id = extract_youtube_id(url)
185
+ if video_id:
186
+ logger.info(f"[Task {task_id}] Fetching YouTube transcript for {video_id}")
187
+ content = await get_youtube_transcript(
188
+ video_id,
189
+ url,
190
+ SUPADATA_API_KEY,
191
+ APIFY_API_TOKEN
192
+ )
193
+ if not content:
194
+ user_feedback_message = "⚠️ Sorry, I couldn't retrieve the transcript for that YouTube video."
195
  else:
196
+ logger.info(f"[Task {task_id}] Attempting website scrape for: {url}")
197
+ content = await get_website_content_via_requests(url)
198
+ if not content and URLTOTEXT_API_KEY:
199
+ await send_typing()
200
+ content = await get_website_content_via_urltotext_api(url, URLTOTEXT_API_KEY)
201
+ if not content:
202
+ user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website."
203
+
204
+ # --- Generate Summary if Content Was Fetched ---
205
  if content:
206
+ logger.info(f"[Task {task_id}] Generating '{summary_type}' summary")
207
+ await send_typing()
208
+
209
+ summary = await generate_summary(content, summary_type, OPENROUTER_API_KEY)
 
 
210
 
211
+ if summary.startswith("Error:") or summary.startswith("Sorry,"):
212
+ user_feedback_message = f"⚠️ {summary}"
 
 
 
 
 
213
  else:
214
+ @retry_bot_operation
215
+ async def send_summary():
216
+ await bot.send_message(
217
+ chat_id=chat_id,
218
+ text=summary,
219
+ parse_mode=ParseMode.MARKDOWN,
220
+ link_preview_options={'is_disabled': True}
221
+ )
222
+
223
+ await send_summary()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  success = True
 
 
 
 
 
 
225
 
226
+ except Exception as e:
227
+ logger.error(f"[Task {task_id}] Error during processing: {e}", exc_info=True)
228
+ user_feedback_message = " An unexpected error occurred while processing your request."
229
 
230
  # --- Send Final Feedback Message if Processing Failed ---
231
  if user_feedback_message and not success:
232
+ @retry_bot_operation
233
+ async def send_feedback():
234
  await bot.send_message(chat_id=chat_id, text=user_feedback_message)
235
+
236
+ await send_feedback()
237
 
238
  except Exception as e:
239
+ logger.error(f"[Task {task_id}] Critical error in task: {e}", exc_info=True)
 
240
  try:
241
+ await bot.send_message(
242
+ chat_id=chat_id,
243
+ text=" A critical error occurred. Please try again later."
244
+ )
245
+ except Exception:
246
+ pass
247
  finally:
248
  # --- Clean up Status Message(s) ---
 
249
  try:
250
  if status_message_sent_id:
 
251
  await bot.delete_message(chat_id=chat_id, message_id=status_message_sent_id)
252
+ elif message_id_to_edit and success:
253
+ await bot.delete_message(chat_id=chat_id, message_id=message_id_to_edit)
254
+ elif message_id_to_edit and not success:
255
+ final_error_text = user_feedback_message or "❌ An error occurred."
256
+ await bot.edit_message_text(
257
+ chat_id=chat_id,
258
+ message_id=message_id_to_edit,
259
+ text=final_error_text[:4090]
260
+ )
261
+ except Exception as e:
262
+ logger.warning(f"[Task {task_id}] Cleanup error: {e}")
263
+
264
+ # Ensure bot session is closed
265
+ try:
266
+ await bot.session.close()
267
+ except Exception:
268
+ pass
 
 
269
 
270
+ logger.info(f"[Task {task_id}] Task completed. Success: {success}")
271
 
272
  # --- Telegram Bot Handlers ---
 
273
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
274
  """Handles the /start command."""
275
  user = update.effective_user
276
  if not user: return
277
+ logger.info(f"User {user.id} initiated /start.")
278
  mention = user.mention_html() if user.username else user.first_name
279
  start_message = (
280
  f"👋 Hello {mention}!\n\n"
 
290
  logger.info(f"User {user.id if user else '?'} requested /help.")
291
  help_text = (
292
  "**How to Use Me:**\n"
293
+ "1. Send me a direct link (URL) to a YouTube video or a web article.\n"
294
+ "2. I will ask you to choose the summary format: `Paragraph` or `Points`.\n"
295
+ "3. Click the button for your preferred format.\n"
296
+ "4. I'll fetch the content, summarise it using AI, and send it back to you!\n\n"
297
  "**Important Notes:**\n"
298
+ "- **YouTube:** Getting transcripts can sometimes fail if they are disabled or unavailable.\n"
299
+ "- **Websites:** Complex websites might not work perfectly.\n"
300
+ "- **AI Summaries:** The AI tries its best to be accurate.\n\n"
 
301
  "Just send a link to get started!"
302
  )
303
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
 
314
 
315
  if match:
316
  url = match.group(0)
317
+ logger.info(f"User {user.id} sent URL: {url}")
318
  context.user_data['url_to_summarize'] = url
 
319
 
320
  keyboard = [
321
  [
 
330
  parse_mode=ParseMode.MARKDOWN,
331
  link_preview_options={'is_disabled': True}
332
  )
333
+ elif not message_text.startswith('/'):
334
+ await update.message.reply_text("Please send me a valid URL (starting with http:// or https://) to summarize.")
 
 
335
 
336
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
337
+ """Handles button presses for summary type selection."""
338
  query = update.callback_query
339
  if not query or not query.from_user or not query.message:
340
+ try:
341
+ await query.answer()
342
+ except:
343
+ pass
 
344
  return
 
 
 
 
345
 
346
+ await query.answer() # Acknowledge the button press immediately
347
+
348
+ user = query.from_user
349
  summary_type = query.data
350
  url = context.user_data.get('url_to_summarize')
351
+ query_id = query.id
352
 
353
+ logger.info(f"User {user.id} chose summary type '{summary_type}'")
354
 
355
  if not url:
356
+ logger.warning(f"No URL found for user {user.id}")
357
  try:
358
  await query.edit_message_text(text="⚠️ Oops! I lost the context for that link. Please send the link again.")
359
+ except Exception as e:
360
+ logger.error(f"Failed to edit message: {e}")
 
 
 
361
  return
362
 
363
+ # Clear the URL from context
 
 
 
 
 
 
 
 
364
  context.user_data.pop('url_to_summarize', None)
 
365
 
366
+ # Schedule background task with token instead of bot instance
 
367
  asyncio.create_task(
368
  process_summary_task(
369
+ user_id=user.id,
370
+ chat_id=query.message.chat_id,
371
+ message_id_to_edit=query.message.message_id,
372
  url=url,
373
  summary_type=summary_type,
374
+ bot_token=TELEGRAM_TOKEN
375
  ),
376
+ name=f"SummaryTask-{user.id}-{query.message.message_id}"
377
  )
378
 
 
 
 
 
 
379
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
380
  """Log Errors caused by Updates or background tasks."""
 
381
  if context.error:
382
  logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
 
 
 
 
 
 
 
 
 
383
 
384
  # --- Bot Setup Function ---
385
  async def setup_bot_config() -> Application:
386
+ """Configures the PTB Application."""
387
  logger.info("Configuring Telegram Application...")
388
  if not TELEGRAM_TOKEN:
 
389
  raise ValueError("TELEGRAM_TOKEN environment variable not set.")
390
 
 
 
 
 
 
 
 
 
 
 
391
  custom_request = HTTPXRequest(
392
+ connect_timeout=10.0,
393
+ read_timeout=30.0,
394
+ write_timeout=30.0,
395
+ pool_timeout=30.0,
396
  http_version="1.1"
397
  )
398
 
399
+ application = (
400
+ Application.builder()
401
+ .token(TELEGRAM_TOKEN)
402
+ .request(custom_request)
403
+ .build()
404
+ )
405
 
406
  application.add_handler(CommandHandler("start", start))
407
  application.add_handler(CommandHandler("help", help_command))
408
  application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
409
+ application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
410
  application.add_error_handler(error_handler)
411
 
412
  logger.info("Telegram application handlers configured.")
 
421
 
422
  try:
423
  ptb_app = await setup_bot_config()
 
424
  await ptb_app.initialize()
425
+ await ptb_app.start()
 
426
 
427
+ bot_info = await ptb_app.bot.get_me()
428
+ logger.info(f"Bot started: @{bot_info.username}")
 
429
 
430
  WEBHOOK_URL_BASE = os.environ.get("SPACE_HOST")
431
  if WEBHOOK_URL_BASE:
432
+ if not WEBHOOK_URL_BASE.startswith("https://"):
433
+ WEBHOOK_URL_BASE = f"https://{WEBHOOK_URL_BASE}"
434
  webhook_path = "/webhook"
435
  full_webhook_url = f"{WEBHOOK_URL_BASE.rstrip('/')}{webhook_path}"
436
 
437
+ logger.info(f"Setting webhook to: {full_webhook_url}")
438
+ await asyncio.sleep(2.0)
439
  try:
440
+ await ptb_app.bot.set_webhook(
441
  url=full_webhook_url,
442
  allowed_updates=Update.ALL_TYPES,
443
+ drop_pending_updates=True
444
  )
445
+ webhook_info = await ptb_app.bot.get_webhook_info()
446
+ logger.info(f"Webhook set: {webhook_info}")
 
 
 
 
 
 
 
 
 
 
 
 
 
447
  except Exception as e:
448
+ logger.error(f"Failed to set webhook: {e}")
 
 
449
 
450
+ logger.info("ASGI Lifespan: Startup complete.")
451
+ yield
452
 
453
  except Exception as startup_err:
454
+ logger.critical(f"Startup error: {startup_err}", exc_info=True)
 
 
455
  raise
456
  finally:
 
457
  logger.info("ASGI Lifespan: Shutdown sequence initiated...")
458
  if ptb_app:
459
+ try:
460
+ await ptb_app.stop()
461
+ await ptb_app.shutdown()
462
+ logger.info("PTB Application shut down gracefully.")
463
+ except Exception as shutdown_err:
464
+ logger.error(f"Shutdown error: {shutdown_err}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  logger.info("ASGI Lifespan: Shutdown complete.")
466
 
467
+ # --- Flask App Setup ---
 
468
  flask_core_app = Flask(__name__)
 
469
 
 
470
  @flask_core_app.route('/')
471
  def index():
472
  """Basic health check endpoint."""
473
+ bot_status = "Unknown"
 
474
  if ptb_app and ptb_app.bot:
475
+ bot_status = f"Running (@{ptb_app.bot.username})"
476
+ return f"Telegram Bot Summarizer - Status: {bot_status}"
 
477
 
478
  @flask_core_app.route('/webhook', methods=['POST'])
479
  async def webhook() -> Response:
480
  """Webhook endpoint called by Telegram."""
 
 
481
  if not ptb_app:
482
+ return Response('Bot not initialized', status=503)
 
483
 
 
 
 
 
 
 
484
  try:
485
  update_data = request.get_json()
486
  if not update_data:
487
+ return Response('Bad Request', status=400)
 
488
 
489
  update = Update.de_json(update_data, ptb_app.bot)
 
 
 
 
 
490
  await ptb_app.process_update(update)
 
 
 
491
  return Response('ok', status=200)
492
 
 
 
 
493
  except Exception as e:
494
+ logger.error(f"Webhook error: {e}")
495
+ return Response('Internal Server Error', status=500)
 
496
 
497
  # --- Create Starlette ASGI Application ---
498
  app = Starlette(
 
502
  Mount("/", app=WSGIMiddleware(flask_core_app))
503
  ]
504
  )
505
+ logger.info("Starlette ASGI application created.")
 
506
 
507
  # --- Development Server Execution Block ---
508
  if __name__ == '__main__':
509
+ logger.warning("Running in development mode (Flask server only)")
510
+ local_port = int(os.environ.get('PORT', 8080))
511
+ flask_core_app.run(host='0.0.0.0', port=local_port, debug=True, use_reloader=False)