Update main.py
Browse files
main.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# main.py (Revised:
|
| 2 |
import os
|
| 3 |
import re
|
| 4 |
import logging
|
|
@@ -24,11 +24,11 @@ from telegram.ext import (
|
|
| 24 |
CallbackQueryHandler,
|
| 25 |
)
|
| 26 |
from telegram.constants import ParseMode
|
| 27 |
-
from telegram.error import NetworkError, RetryAfter # Import
|
| 28 |
from telegram.request import HTTPXRequest # Import the request class
|
| 29 |
|
| 30 |
# --- Other Libraries ---
|
| 31 |
-
|
| 32 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 33 |
import requests
|
| 34 |
from bs4 import BeautifulSoup
|
|
@@ -43,6 +43,7 @@ logging.basicConfig(
|
|
| 43 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 44 |
level=logging.DEBUG
|
| 45 |
)
|
|
|
|
| 46 |
logging.getLogger("httpx").setLevel(logging.WARNING)
|
| 47 |
if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
|
| 48 |
logging.getLogger("telegram.ext").setLevel(logging.INFO)
|
|
@@ -51,6 +52,7 @@ logging.getLogger("urllib3").setLevel(logging.INFO)
|
|
| 51 |
logging.getLogger('gunicorn.error').setLevel(logging.INFO)
|
| 52 |
logging.getLogger('uvicorn').setLevel(logging.INFO)
|
| 53 |
logging.getLogger('starlette').setLevel(logging.INFO)
|
|
|
|
| 54 |
logger = logging.getLogger(__name__)
|
| 55 |
logger.info("Logging configured.")
|
| 56 |
|
|
@@ -60,7 +62,7 @@ ptb_app: Application | None = None
|
|
| 60 |
# --- Environment Variable Loading ---
|
| 61 |
logger.info("Attempting to load secrets...")
|
| 62 |
def get_secret(secret_name):
|
| 63 |
-
logger.debug(f"Attempting to read secret: {secret_name}")
|
| 64 |
value = os.environ.get(secret_name)
|
| 65 |
if value: logger.info(f"Secret '{secret_name}': Found (Value length: {len(value)})")
|
| 66 |
else: logger.warning(f"Secret '{secret_name}': Not Found")
|
|
@@ -79,7 +81,6 @@ logger.info("Secret loading attempt finished.")
|
|
| 79 |
# get_transcript_via_supadata, get_transcript_via_apify,
|
| 80 |
# get_youtube_transcript, get_website_content_via_requests,
|
| 81 |
# get_website_content_via_urltotext_api, generate_summary)
|
| 82 |
-
# Ensure the generate_summary has the updated prompts from previous response
|
| 83 |
|
| 84 |
# Helper Functions
|
| 85 |
def is_youtube_url(url):
|
|
@@ -111,7 +112,8 @@ async def get_transcript_via_supadata(video_id: str, api_key: str):
|
|
| 111 |
params = {"videoId": video_id, "format": "text"}
|
| 112 |
headers = {"X-API-Key": api_key}
|
| 113 |
try:
|
| 114 |
-
|
|
|
|
| 115 |
response = await asyncio.to_thread(requests.get, api_endpoint, headers=headers, params=params, timeout=30, verify=False)
|
| 116 |
logger.debug(f"[Supadata] Received status code {response.status_code} for {video_id}")
|
| 117 |
if response.status_code == 200:
|
|
@@ -253,268 +255,625 @@ async def get_youtube_transcript(video_id: str, video_url: str, supadata_key: st
|
|
| 253 |
except Exception as e:
|
| 254 |
logger.warning(f"[Primary YT] Error via library: {type(e).__name__} - {e}")
|
| 255 |
if "YouTube is blocking requests" in str(e) or "HTTP Error 429" in str(e): logger.warning("[Primary YT] IP likely blocked.")
|
| 256 |
-
elif "No transcript found" in str(e): logger.warning("[Primary YT] No transcript in languages.")
|
| 257 |
-
elif "TranscriptsDisabled" in str(e) or "disabled" in str(e): logger.warning("[Primary YT] Transcripts disabled.")
|
| 258 |
-
transcript_text = None
|
| 259 |
|
| 260 |
if transcript_text is None: # Fallback 1: Supadata
|
| 261 |
logger.info("[Fallback YT 1] Trying Supadata API...")
|
| 262 |
if supadata_key:
|
| 263 |
transcript_text = await get_transcript_via_supadata(video_id, supadata_key)
|
| 264 |
if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata. Length: {len(transcript_text)}"); return transcript_text
|
| 265 |
-
else: logger.warning("[Fallback YT 1] Supadata failed or no content.")
|
| 266 |
-
else: logger.warning("[Fallback YT 1] Supadata key not available.")
|
| 267 |
|
| 268 |
if transcript_text is None: # Fallback 2: Apify
|
| 269 |
logger.info("[Fallback YT 2] Trying Apify API...")
|
| 270 |
if apify_token:
|
| 271 |
transcript_text = await get_transcript_via_apify(video_url, apify_token)
|
| 272 |
if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify. Length: {len(transcript_text)}"); return transcript_text
|
| 273 |
-
else: logger.warning("[Fallback YT 2] Apify failed or no content.")
|
| 274 |
-
else: logger.warning("[Fallback YT 2] Apify token not available.")
|
| 275 |
|
| 276 |
-
if transcript_text is None: logger.error(f"All methods failed for video ID: {video_id}")
|
| 277 |
return transcript_text
|
| 278 |
|
| 279 |
# Website Content via Requests/BS4
|
| 280 |
async def get_website_content_via_requests(url):
|
| 281 |
"""Attempts to scrape website content using requests/BeautifulSoup."""
|
| 282 |
-
if not url: logger.error("[Web Scraper - Requests/BS4] no URL"); return None
|
| 283 |
-
logger.info(f"[Web Scraper - Requests/BS4]
|
| 284 |
try:
|
| 285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
response = await asyncio.to_thread(requests.get, url, headers=headers, timeout=25, allow_redirects=True)
|
| 287 |
-
response.raise_for_status()
|
| 288 |
logger.debug(f"[Web Scraper - Requests/BS4] Status {response.status_code} for {url}")
|
|
|
|
| 289 |
content_type = response.headers.get('content-type', '').lower()
|
| 290 |
if 'html' not in content_type:
|
| 291 |
-
logger.warning(f"[Web Scraper - Requests/BS4] Non-HTML: {content_type}.
|
| 292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
return None
|
|
|
|
| 294 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 295 |
-
|
| 296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
target_element = main_content if main_content else soup.body
|
| 298 |
-
if not target_element:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
|
| 300 |
-
text = "\n".join(lines)
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
return text
|
| 305 |
-
|
| 306 |
-
except requests.exceptions.
|
| 307 |
-
except requests.exceptions.
|
| 308 |
-
except
|
|
|
|
|
|
|
| 309 |
|
| 310 |
# Website Content via URLToText API
|
| 311 |
async def get_website_content_via_urltotext_api(url: str, api_key: str):
|
| 312 |
"""Fetches website content using the URLToText API."""
|
| 313 |
-
if not url: logger.error("[Web Scraper - URLToText API] no URL"); return None
|
| 314 |
-
if not api_key: logger.error("[Web Scraper - URLToText API] API key missing."); return None
|
| 315 |
-
logger.info(f"[Web Scraper - URLToText API] Attempting fetch: {url}")
|
| 316 |
api_endpoint = "https://urltotext.com/api/v1/urltotext/"
|
| 317 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"}
|
| 319 |
try:
|
| 320 |
-
response = await asyncio.to_thread(requests.post, api_endpoint, headers=headers, data=payload, timeout=
|
| 321 |
-
logger.debug(f"[Web Scraper - URLToText API]
|
| 322 |
if response.status_code == 200:
|
| 323 |
try:
|
| 324 |
data = response.json()
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
|
| 342 |
# DeepSeek Summary Function (with updated prompts)
|
| 343 |
async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
|
| 344 |
"""Generates summary using DeepSeek via OpenRouter API."""
|
| 345 |
logger.info(f"Generating '{summary_type}' summary. Input length: {len(text)}")
|
| 346 |
-
if not api_key: logger.error("OpenRouter API key missing."); return "Error: AI
|
| 347 |
-
if not text: logger.warning("generate_summary called with empty text."); return "Error: No content to summarize."
|
| 348 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
|
| 350 |
# --- UPDATED PROMPTS ---
|
| 351 |
if summary_type == "paragraph":
|
| 352 |
-
|
| 353 |
-
"You are an AI
|
| 354 |
-
"
|
| 355 |
-
"
|
| 356 |
-
"
|
| 357 |
-
"
|
| 358 |
-
"
|
| 359 |
-
"
|
| 360 |
-
"
|
| 361 |
-
"
|
|
|
|
| 362 |
)
|
|
|
|
|
|
|
| 363 |
elif summary_type == "points":
|
| 364 |
-
|
| 365 |
-
"You are an AI
|
| 366 |
-
"
|
| 367 |
-
"
|
| 368 |
-
"
|
| 369 |
-
"
|
| 370 |
-
"
|
| 371 |
-
"
|
| 372 |
-
"
|
| 373 |
-
"
|
|
|
|
|
|
|
| 374 |
)
|
|
|
|
| 375 |
else:
|
| 376 |
-
logger.error(f"Invalid summary_type '{summary_type}'.")
|
| 377 |
-
return f"Error: Invalid summary type ('{summary_type}')."
|
| 378 |
# --- END UPDATED PROMPTS ---
|
| 379 |
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
try:
|
| 390 |
-
logger.debug(f"Sending request to OpenRouter (Model: {model_name})
|
| 391 |
-
|
|
|
|
| 392 |
logger.debug(f"Received status {response.status_code} from OpenRouter.")
|
|
|
|
| 393 |
if response.status_code == 200:
|
| 394 |
try:
|
| 395 |
data = response.json()
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
else:
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
|
| 419 |
|
| 420 |
# --- Telegram Bot Handlers ---
|
| 421 |
|
| 422 |
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
|
|
|
|
|
|
| 426 |
mention = user.mention_html() if user.username else user.first_name
|
| 427 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
|
| 429 |
async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
|
| 434 |
|
| 435 |
async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
|
|
|
| 436 |
if not update.message or not update.message.text: return
|
| 437 |
-
message_text = update.message.text.strip()
|
| 438 |
-
|
| 439 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
if match:
|
| 441 |
-
url = match.group(0)
|
| 442 |
-
|
| 443 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
reply_markup = InlineKeyboardMarkup(keyboard)
|
| 445 |
-
|
| 446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
|
| 448 |
async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
if not url:
|
| 455 |
-
logger.warning(f"User {user.id} pressed button, NO URL in context.")
|
| 456 |
-
try:
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
if not current_openrouter_key:
|
| 464 |
-
logger.error("OpenRouter key missing.")
|
| 465 |
-
try:
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
return
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
try:
|
| 476 |
-
|
| 477 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
if is_yt:
|
| 479 |
video_id = extract_youtube_id(url)
|
| 480 |
if video_id:
|
| 481 |
-
logger.info(f"Fetching
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
else:
|
| 486 |
-
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
else:
|
| 489 |
-
logger.warning(f"
|
| 490 |
if current_urltotext_key:
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
if content:
|
| 496 |
-
logger.info("Content
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
summary = await generate_summary(content, summary_type, current_openrouter_key)
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
except Exception as e:
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 506 |
finally:
|
| 507 |
-
|
|
|
|
| 508 |
try:
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
|
| 513 |
async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
|
| 514 |
"""Log Errors caused by Updates."""
|
| 515 |
logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
|
| 517 |
-
# --- Bot Setup Function (Modified:
|
| 518 |
async def setup_bot_config() -> Application:
|
| 519 |
"""Configures the PTB Application with custom HTTPX settings."""
|
| 520 |
logger.info("Configuring Telegram Application...")
|
|
@@ -522,44 +881,58 @@ async def setup_bot_config() -> Application:
|
|
| 522 |
logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable not found.")
|
| 523 |
raise ValueError("TELEGRAM_TOKEN environment variable not set.")
|
| 524 |
|
| 525 |
-
# --- Configure HTTPX client settings
|
| 526 |
-
|
| 527 |
-
#
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
|
| 533 |
-
logger.info(f"Creating PTB HTTPXRequest with
|
| 534 |
-
f"
|
| 535 |
-
f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
|
| 537 |
# Create a custom request object with these settings
|
| 538 |
-
# connection_pool_size default is 10, which is usually fine.
|
| 539 |
custom_request = HTTPXRequest(
|
| 540 |
connect_timeout=connect_timeout,
|
| 541 |
read_timeout=read_timeout,
|
| 542 |
-
write_timeout=write_timeout,
|
| 543 |
pool_timeout=pool_timeout,
|
| 544 |
-
|
|
|
|
| 545 |
)
|
| 546 |
|
| 547 |
# Use Application.builder() and pass the custom request object
|
| 548 |
application_builder = Application.builder().token(TELEGRAM_TOKEN)
|
| 549 |
application_builder.request(custom_request)
|
| 550 |
-
|
|
|
|
|
|
|
|
|
|
| 551 |
|
| 552 |
# Build the application instance
|
| 553 |
application = application_builder.build()
|
| 554 |
|
| 555 |
-
# --- Register Handlers
|
| 556 |
application.add_handler(CommandHandler("start", start))
|
| 557 |
application.add_handler(CommandHandler("help", help_command))
|
|
|
|
| 558 |
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
|
|
|
|
| 559 |
application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
|
|
|
|
| 560 |
application.add_error_handler(error_handler)
|
| 561 |
|
| 562 |
-
logger.info("Telegram handlers configured.")
|
| 563 |
return application
|
| 564 |
|
| 565 |
# --- ASGI Lifespan Context Manager ---
|
|
@@ -567,111 +940,198 @@ async def setup_bot_config() -> Application:
|
|
| 567 |
async def lifespan(app: Starlette):
|
| 568 |
"""Handles PTB startup and shutdown during ASGI lifespan."""
|
| 569 |
global ptb_app
|
| 570 |
-
logger.info("ASGI Lifespan: Startup
|
| 571 |
-
loop = asyncio.get_running_loop()
|
| 572 |
|
| 573 |
try:
|
|
|
|
| 574 |
ptb_app = await setup_bot_config()
|
| 575 |
-
logger.info("PTB
|
| 576 |
-
await ptb_app.initialize()
|
| 577 |
-
logger.info("PTB
|
|
|
|
| 578 |
await ptb_app.start()
|
| 579 |
-
|
|
|
|
|
|
|
|
|
|
| 580 |
|
|
|
|
|
|
|
| 581 |
WEBHOOK_URL_BASE = os.environ.get("SPACE_HOST")
|
| 582 |
if WEBHOOK_URL_BASE:
|
|
|
|
| 583 |
if not WEBHOOK_URL_BASE.startswith("https://"): WEBHOOK_URL_BASE = f"https://{WEBHOOK_URL_BASE}"
|
| 584 |
-
webhook_path = "/webhook"
|
| 585 |
full_webhook_url = f"{WEBHOOK_URL_BASE.rstrip('/')}{webhook_path}"
|
| 586 |
-
|
|
|
|
|
|
|
|
|
|
| 587 |
try:
|
| 588 |
-
|
| 589 |
-
await
|
| 590 |
-
|
| 591 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 592 |
except RetryAfter as e:
|
| 593 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 594 |
except Exception as e:
|
| 595 |
-
logger.error(f"Failed to set webhook: {e}", exc_info=True)
|
| 596 |
-
else:
|
|
|
|
| 597 |
|
| 598 |
-
logger.info("ASGI Lifespan: Startup complete. Application ready.")
|
| 599 |
-
yield # Application runs here
|
| 600 |
|
| 601 |
except Exception as startup_err:
|
| 602 |
-
logger.critical(f"CRITICAL ERROR during ASGI startup: {startup_err}", exc_info=True)
|
|
|
|
| 603 |
raise
|
| 604 |
finally:
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
| 618 |
|
| 619 |
|
| 620 |
-
# --- Flask App Setup (for
|
|
|
|
|
|
|
| 621 |
flask_core_app = Flask(__name__)
|
| 622 |
-
logger.info("Core Flask app instance created (for routing
|
| 623 |
|
| 624 |
-
# --- Define Flask Routes
|
| 625 |
@flask_core_app.route('/')
|
| 626 |
def index():
|
| 627 |
"""Basic health check endpoint."""
|
| 628 |
-
logger.debug("Health check '/' accessed.")
|
| 629 |
-
bot_status = "
|
| 630 |
-
if ptb_app
|
| 631 |
-
|
| 632 |
-
|
|
|
|
|
|
|
| 633 |
|
| 634 |
@flask_core_app.route('/webhook', methods=['POST'])
|
| 635 |
async def webhook() -> Response:
|
| 636 |
-
"""Webhook endpoint
|
|
|
|
|
|
|
| 637 |
if not ptb_app:
|
| 638 |
-
logger.error("Webhook triggered, but PTB Application instance (ptb_app) is None.")
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
app = Starlette(
|
| 660 |
-
|
|
|
|
| 661 |
routes=[
|
|
|
|
|
|
|
| 662 |
Mount("/", app=WSGIMiddleware(flask_core_app))
|
| 663 |
]
|
| 664 |
)
|
| 665 |
-
logger.info("Starlette application created with lifespan and Flask app mounted at '/'.")
|
| 666 |
|
| 667 |
|
| 668 |
-
# ---
|
|
|
|
|
|
|
|
|
|
| 669 |
if __name__ == '__main__':
|
| 670 |
-
logger.warning("
|
| 671 |
-
logger.warning("
|
| 672 |
-
logger.warning("
|
| 673 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 674 |
else:
|
|
|
|
| 675 |
local_port = int(os.environ.get('PORT', 8080))
|
| 676 |
-
logger.info(f"Flask
|
| 677 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
# main.py (Revised: Increased Pool/Timeouts + Robust Callback Handling)
|
| 2 |
import os
|
| 3 |
import re
|
| 4 |
import logging
|
|
|
|
| 24 |
CallbackQueryHandler,
|
| 25 |
)
|
| 26 |
from telegram.constants import ParseMode
|
| 27 |
+
from telegram.error import NetworkError, RetryAfter, TimedOut # Import TimedOut
|
| 28 |
from telegram.request import HTTPXRequest # Import the request class
|
| 29 |
|
| 30 |
# --- Other Libraries ---
|
| 31 |
+
import httpx # <<<--- ADDED IMPORT for httpx.Limits
|
| 32 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 33 |
import requests
|
| 34 |
from bs4 import BeautifulSoup
|
|
|
|
| 43 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 44 |
level=logging.DEBUG
|
| 45 |
)
|
| 46 |
+
# Reduce log spam from libraries
|
| 47 |
logging.getLogger("httpx").setLevel(logging.WARNING)
|
| 48 |
if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
|
| 49 |
logging.getLogger("telegram.ext").setLevel(logging.INFO)
|
|
|
|
| 52 |
logging.getLogger('gunicorn.error').setLevel(logging.INFO)
|
| 53 |
logging.getLogger('uvicorn').setLevel(logging.INFO)
|
| 54 |
logging.getLogger('starlette').setLevel(logging.INFO)
|
| 55 |
+
# Keep our app logger at DEBUG
|
| 56 |
logger = logging.getLogger(__name__)
|
| 57 |
logger.info("Logging configured.")
|
| 58 |
|
|
|
|
| 62 |
# --- Environment Variable Loading ---
|
| 63 |
logger.info("Attempting to load secrets...")
|
| 64 |
def get_secret(secret_name):
|
| 65 |
+
# logger.debug(f"Attempting to read secret: {secret_name}") # Optional: Less verbose startup
|
| 66 |
value = os.environ.get(secret_name)
|
| 67 |
if value: logger.info(f"Secret '{secret_name}': Found (Value length: {len(value)})")
|
| 68 |
else: logger.warning(f"Secret '{secret_name}': Not Found")
|
|
|
|
| 81 |
# get_transcript_via_supadata, get_transcript_via_apify,
|
| 82 |
# get_youtube_transcript, get_website_content_via_requests,
|
| 83 |
# get_website_content_via_urltotext_api, generate_summary)
|
|
|
|
| 84 |
|
| 85 |
# Helper Functions
|
| 86 |
def is_youtube_url(url):
|
|
|
|
| 112 |
params = {"videoId": video_id, "format": "text"}
|
| 113 |
headers = {"X-API-Key": api_key}
|
| 114 |
try:
|
| 115 |
+
# Consider removing verify=False if possible, or manage certificates properly
|
| 116 |
+
logger.warning("[Supadata] Making request with verify=False (Attempting to bypass SSL verification - Potential Security Risk)")
|
| 117 |
response = await asyncio.to_thread(requests.get, api_endpoint, headers=headers, params=params, timeout=30, verify=False)
|
| 118 |
logger.debug(f"[Supadata] Received status code {response.status_code} for {video_id}")
|
| 119 |
if response.status_code == 200:
|
|
|
|
| 255 |
except Exception as e:
|
| 256 |
logger.warning(f"[Primary YT] Error via library: {type(e).__name__} - {e}")
|
| 257 |
if "YouTube is blocking requests" in str(e) or "HTTP Error 429" in str(e): logger.warning("[Primary YT] IP likely blocked.")
|
| 258 |
+
elif "No transcript found" in str(e): logger.warning("[Primary YT] No transcript in specified languages.")
|
| 259 |
+
elif "TranscriptsDisabled" in str(e) or "disabled" in str(e): logger.warning("[Primary YT] Transcripts disabled for this video.")
|
| 260 |
+
transcript_text = None # Ensure it's None on error
|
| 261 |
|
| 262 |
if transcript_text is None: # Fallback 1: Supadata
|
| 263 |
logger.info("[Fallback YT 1] Trying Supadata API...")
|
| 264 |
if supadata_key:
|
| 265 |
transcript_text = await get_transcript_via_supadata(video_id, supadata_key)
|
| 266 |
if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata. Length: {len(transcript_text)}"); return transcript_text
|
| 267 |
+
else: logger.warning("[Fallback YT 1] Supadata failed or no content found.")
|
| 268 |
+
else: logger.warning("[Fallback YT 1] Supadata key not available, skipping.")
|
| 269 |
|
| 270 |
if transcript_text is None: # Fallback 2: Apify
|
| 271 |
logger.info("[Fallback YT 2] Trying Apify API...")
|
| 272 |
if apify_token:
|
| 273 |
transcript_text = await get_transcript_via_apify(video_url, apify_token)
|
| 274 |
if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify. Length: {len(transcript_text)}"); return transcript_text
|
| 275 |
+
else: logger.warning("[Fallback YT 2] Apify failed or no content found.")
|
| 276 |
+
else: logger.warning("[Fallback YT 2] Apify token not available, skipping.")
|
| 277 |
|
| 278 |
+
if transcript_text is None: logger.error(f"All methods failed to fetch transcript for video ID: {video_id}")
|
| 279 |
return transcript_text
|
| 280 |
|
| 281 |
# Website Content via Requests/BS4
|
| 282 |
async def get_website_content_via_requests(url):
|
| 283 |
"""Attempts to scrape website content using requests/BeautifulSoup."""
|
| 284 |
+
if not url: logger.error("[Web Scraper - Requests/BS4] get_website_content_via_requests called with no URL"); return None
|
| 285 |
+
logger.info(f"[Web Scraper - Requests/BS4] Attempting fetch: {url}")
|
| 286 |
try:
|
| 287 |
+
# Standard headers, avoid overly aggressive scraping patterns
|
| 288 |
+
headers = {
|
| 289 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', # Updated UA
|
| 290 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
| 291 |
+
'Accept-Language': 'en-US,en;q=0.9',
|
| 292 |
+
'Connection': 'keep-alive',
|
| 293 |
+
'DNT': '1', # Do Not Track header
|
| 294 |
+
'Upgrade-Insecure-Requests': '1'
|
| 295 |
+
}
|
| 296 |
response = await asyncio.to_thread(requests.get, url, headers=headers, timeout=25, allow_redirects=True)
|
| 297 |
+
response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
|
| 298 |
logger.debug(f"[Web Scraper - Requests/BS4] Status {response.status_code} for {url}")
|
| 299 |
+
|
| 300 |
content_type = response.headers.get('content-type', '').lower()
|
| 301 |
if 'html' not in content_type:
|
| 302 |
+
logger.warning(f"[Web Scraper - Requests/BS4] Non-HTML content type received: {content_type}. Attempting plain text extraction.")
|
| 303 |
+
# Allow plain text only if explicitly text/plain
|
| 304 |
+
if 'text/plain' in content_type and response.text:
|
| 305 |
+
logger.info(f"[Web Scraper - Requests/BS4] Extracted plain text content. Length: {len(response.text.strip())}")
|
| 306 |
+
return response.text.strip()
|
| 307 |
+
logger.warning(f"[Web Scraper - Requests/BS4] Content type '{content_type}' not suitable for parsing. Aborting.")
|
| 308 |
return None
|
| 309 |
+
|
| 310 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 311 |
+
|
| 312 |
+
# Remove common non-content tags more aggressively
|
| 313 |
+
tags_to_remove = ["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "textarea", "select", "option", "label", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "video", "audio", "picture", "source"]
|
| 314 |
+
# Also remove elements often used for ads or menus by class/id
|
| 315 |
+
selectors_to_remove = ['.ad', '#ad', '.ads', '#ads', '.advertisement', '#advertisement', '.banner', '#banner', '.menu', '#menu', '.navigation', '#navigation', '.sidebar', '#sidebar', '.social', '#social', '.share', '#share', '.related', '#related', '.comments', '#comments', '.cookie-consent', '#cookie-consent']
|
| 316 |
+
|
| 317 |
+
for tag in soup(tags_to_remove): tag.decompose()
|
| 318 |
+
for selector in selectors_to_remove:
|
| 319 |
+
for element in soup.select(selector): element.decompose()
|
| 320 |
+
|
| 321 |
+
# Try to find semantic main content areas first
|
| 322 |
+
main_content = soup.find('main') or \
|
| 323 |
+
soup.find('article') or \
|
| 324 |
+
soup.find(id='content') or \
|
| 325 |
+
soup.find(class_='content') or \
|
| 326 |
+
soup.find(id='main-content') or \
|
| 327 |
+
soup.find(class_='main-content') or \
|
| 328 |
+
soup.find(role='main')
|
| 329 |
+
|
| 330 |
target_element = main_content if main_content else soup.body
|
| 331 |
+
if not target_element:
|
| 332 |
+
logger.warning(f"[Web Scraper - Requests/BS4] Could not find a suitable target element (main, article, body) for {url}");
|
| 333 |
+
return None
|
| 334 |
+
|
| 335 |
+
# Extract text, attempting to preserve paragraphs better
|
| 336 |
lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
|
| 337 |
+
text = "\n\n".join(lines) # Join lines with double newline for paragraph separation
|
| 338 |
+
|
| 339 |
+
MIN_TEXT_LENGTH = 100 # Increased minimum length
|
| 340 |
+
if not text or len(text) < MIN_TEXT_LENGTH:
|
| 341 |
+
logger.warning(f"[Web Scraper - Requests/BS4] Extracted text is too short (<{MIN_TEXT_LENGTH} chars) after cleaning for {url}. Length: {len(text)}. Content might be JS-rendered or blocked.")
|
| 342 |
+
# Optional: Log the short text for debugging: logger.debug(f"Short text: {text[:500]}")
|
| 343 |
+
return None # Treat very short text as failure
|
| 344 |
+
|
| 345 |
+
logger.info(f"[Web Scraper - Requests/BS4] Successfully scraped and cleaned content from {url}. Final Length: {len(text)}")
|
| 346 |
return text
|
| 347 |
+
|
| 348 |
+
except requests.exceptions.Timeout: logger.error(f"[Web Scraper - Requests/BS4] Timeout error fetching {url}"); return None
|
| 349 |
+
except requests.exceptions.TooManyRedirects: logger.error(f"[Web Scraper - Requests/BS4] Too many redirects error for {url}"); return None
|
| 350 |
+
except requests.exceptions.HTTPError as e: logger.error(f"[Web Scraper - Requests/BS4] HTTP error {e.response.status_code} for {url}"); return None
|
| 351 |
+
except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - Requests/BS4] General request error for {url}: {e}"); return None
|
| 352 |
+
except Exception as e: logger.error(f"[Web Scraper - Requests/BS4] Error during parsing or processing {url}: {e}", exc_info=True); return None
|
| 353 |
|
| 354 |
# Website Content via URLToText API
|
| 355 |
async def get_website_content_via_urltotext_api(url: str, api_key: str):
|
| 356 |
"""Fetches website content using the URLToText API."""
|
| 357 |
+
if not url: logger.error("[Web Scraper - URLToText API] get_website_content_via_urltotext_api called with no URL"); return None
|
| 358 |
+
if not api_key: logger.error("[Web Scraper - URLToText API] API key is missing."); return None
|
| 359 |
+
logger.info(f"[Web Scraper - URLToText API] Attempting fetch via API: {url}")
|
| 360 |
api_endpoint = "https://urltotext.com/api/v1/urltotext/"
|
| 361 |
+
# Ensure payload includes options beneficial for scraping modern sites
|
| 362 |
+
payload = json.dumps({
|
| 363 |
+
"url": url,
|
| 364 |
+
"output_format": "text",
|
| 365 |
+
"extract_main_content": True, # Try to get just the core article/content
|
| 366 |
+
"render_javascript": True, # Crucial for JS-heavy sites
|
| 367 |
+
"residential_proxy": False, # Set to True if facing blocks, requires appropriate plan
|
| 368 |
+
"timeout_render": 20000, # Increase JS render timeout (in ms)
|
| 369 |
+
})
|
| 370 |
headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"}
|
| 371 |
try:
|
| 372 |
+
response = await asyncio.to_thread(requests.post, api_endpoint, headers=headers, data=payload, timeout=60) # Increased overall timeout
|
| 373 |
+
logger.debug(f"[Web Scraper - URLToText API] Received status code {response.status_code} for {url}")
|
| 374 |
if response.status_code == 200:
|
| 375 |
try:
|
| 376 |
data = response.json()
|
| 377 |
+
content_data = data.get("data", {})
|
| 378 |
+
content = content_data.get("content")
|
| 379 |
+
credits = data.get("credits_used", "N/A")
|
| 380 |
+
warning = content_data.get("warning")
|
| 381 |
+
error_msg = content_data.get("error") # Check for specific error in response data
|
| 382 |
+
|
| 383 |
+
if warning: logger.warning(f"[Web Scraper - URLToText API] API Warning for {url}: {warning}")
|
| 384 |
+
if error_msg: logger.error(f"[Web Scraper - URLToText API] API Error reported for {url}: {error_msg}"); return None # Treat API error as failure
|
| 385 |
+
|
| 386 |
+
if content and isinstance(content, str):
|
| 387 |
+
logger.info(f"[Web Scraper - URLToText API] Successfully fetched content via API. Length: {len(content.strip())}. Credits Used: {credits}");
|
| 388 |
+
return content.strip()
|
| 389 |
+
else:
|
| 390 |
+
logger.warning(f"[Web Scraper - URLToText API] API returned status 200 but content is empty or invalid for {url}. Response: {data}");
|
| 391 |
+
return None
|
| 392 |
+
except json.JSONDecodeError: logger.error(f"[Web Scraper - URLToText API] Failed to decode JSON response from API. Status: {response.status_code}. Response Text: {response.text[:500]}..."); return None
|
| 393 |
+
except Exception as e: logger.error(f"[Web Scraper - URLToText API] Error processing successful API response: {e}", exc_info=True); return None
|
| 394 |
+
elif response.status_code == 400: logger.error(f"[Web Scraper - URLToText API] Bad Request (400) to API. Check payload/URL. Response: {response.text[:200]}...")
|
| 395 |
+
elif response.status_code == 401: logger.error(f"[Web Scraper - URLToText API] Unauthorized (401). Check API Key. Response: {response.text[:200]}...")
|
| 396 |
+
elif response.status_code == 402: logger.error(f"[Web Scraper - URLToText API] Payment Required (402). Check API credits/plan. Response: {response.text[:200]}...")
|
| 397 |
+
elif response.status_code == 422: logger.warning(f"[Web Scraper - URLToText API] Unprocessable URL / Fetch Error (422) reported by API for {url}. Response: {response.text[:200]}...") # Might mean the site blocked the API
|
| 398 |
+
elif response.status_code == 429: logger.warning(f"[Web Scraper - URLToText API] Rate Limit Hit (429). Response: {response.text[:200]}...")
|
| 399 |
+
elif response.status_code >= 500: logger.error(f"[Web Scraper - URLToText API] API Server Error ({response.status_code}). Response: {response.text[:200]}...")
|
| 400 |
+
else: logger.error(f"[Web Scraper - URLToText API] Unexpected status code {response.status_code} from API. Response: {response.text[:200]}...")
|
| 401 |
+
return None # Return None for all non-200 responses after logging
|
| 402 |
+
except requests.exceptions.Timeout: logger.error(f"[Web Scraper - URLToText API] Timeout connecting to API for {url}"); return None
|
| 403 |
+
except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - URLToText API] Request error connecting to API: {e}"); return None
|
| 404 |
+
except Exception as e: logger.error(f"[Web Scraper - URLToText API] Unexpected error during API call: {e}", exc_info=True); return None
|
| 405 |
|
| 406 |
# DeepSeek Summary Function (with updated prompts)
|
| 407 |
async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
|
| 408 |
"""Generates summary using DeepSeek via OpenRouter API."""
|
| 409 |
logger.info(f"Generating '{summary_type}' summary. Input length: {len(text)}")
|
| 410 |
+
if not api_key: logger.error("OpenRouter API key missing."); return "Error: AI service configuration key is missing."
|
| 411 |
+
if not text or not text.strip(): logger.warning("generate_summary called with empty or whitespace-only text."); return "Error: No content was provided to summarize."
|
| 412 |
+
|
| 413 |
+
openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
|
| 414 |
+
# Consider using a non-free model if rate limits are hit or quality needed
|
| 415 |
+
model_name = "deepseek/deepseek-chat:free"
|
| 416 |
+
# model_name = "openai/gpt-3.5-turbo" # Example alternative
|
| 417 |
|
| 418 |
# --- UPDATED PROMPTS ---
|
| 419 |
if summary_type == "paragraph":
|
| 420 |
+
system_message = (
|
| 421 |
+
"You are an expert summarization AI. Your goal is to provide a concise, easy-to-understand summary of the provided text. "
|
| 422 |
+
"Follow these instructions precisely:\n"
|
| 423 |
+
"1. **Language and Spelling:** Use simple British English. Ensure all spellings conform to British English (e.g., 'summarise', 'centre', 'realise').\n"
|
| 424 |
+
"2. **Clarity:** Write clearly so someone unfamiliar with the topic can understand.\n"
|
| 425 |
+
"3. **Format:** Output a single paragraph only.\n"
|
| 426 |
+
"4. **Conciseness:** The summary must be **no more than 85 words** long.\n"
|
| 427 |
+
"5. **Completeness:** Cover the main points from the entire text, not just the start.\n"
|
| 428 |
+
"6. **Punctuation:** Do NOT use em dashes (– or —). Use semicolons (;) if needed for complex sentence structure, but prefer simpler sentences.\n"
|
| 429 |
+
"7. **Tone:** Maintain a neutral and informative tone.\n"
|
| 430 |
+
"8. **Focus:** Extract factual information and key topics. Do not add opinions or information not present in the text."
|
| 431 |
)
|
| 432 |
+
user_prompt_instruction = "Summarize the following text into a single paragraph adhering strictly to the rules outlined in the system message:"
|
| 433 |
+
|
| 434 |
elif summary_type == "points":
|
| 435 |
+
system_message = (
|
| 436 |
+
"You are an expert summarization AI. Your goal is to extract the key points from the provided text and present them as a bulleted list. "
|
| 437 |
+
"Follow these instructions precisely:\n"
|
| 438 |
+
"1. **Language and Spelling:** Use simple British English. Ensure all spellings conform to British English (e.g., 'summarise', 'centre', 'realise').\n"
|
| 439 |
+
"2. **Clarity:** Write clearly so someone unfamiliar with the topic can understand.\n"
|
| 440 |
+
"3. **Format:** Output as a bulleted list. Start each point with a standard bullet character ('*' or '-'). Each point should be distinct and on a new line.\n"
|
| 441 |
+
"4. **Content:** Each bullet point should represent a single key finding, main topic, or significant piece of information from the text.\n"
|
| 442 |
+
"5. **Conciseness:** Keep each bullet point brief and to the point.\n"
|
| 443 |
+
"6. **Completeness:** Cover the main points from the entire text, not just the start.\n"
|
| 444 |
+
"7. **Punctuation:** Do NOT use em dashes (– or —) within bullet points.\n"
|
| 445 |
+
"8. **Tone:** Maintain a neutral and informative tone.\n"
|
| 446 |
+
"9. **Focus:** Extract factual information and key topics. Do not add opinions or information not present in the text."
|
| 447 |
)
|
| 448 |
+
user_prompt_instruction = "Summarize the following text into a bulleted list adhering strictly to the rules outlined in the system message:"
|
| 449 |
else:
|
| 450 |
+
logger.error(f"Invalid summary_type '{summary_type}' requested.")
|
| 451 |
+
return f"Error: Invalid summary type ('{summary_type}') requested. Please choose 'paragraph' or 'points'."
|
| 452 |
# --- END UPDATED PROMPTS ---
|
| 453 |
|
| 454 |
+
# Practical limit for API context window / cost control
|
| 455 |
+
# Deepseek context might be larger, but set a reasonable app limit
|
| 456 |
+
MAX_INPUT_TOKENS_ESTIMATE = 28000 # Rough estimate for deepseek-chat's context limit (aim lower than max)
|
| 457 |
+
# Simple character length heuristic (adjust based on typical content)
|
| 458 |
+
AVG_CHARS_PER_TOKEN = 4
|
| 459 |
+
MAX_INPUT_LENGTH = MAX_INPUT_TOKENS_ESTIMATE * AVG_CHARS_PER_TOKEN
|
| 460 |
+
|
| 461 |
+
if len(text) > MAX_INPUT_LENGTH:
|
| 462 |
+
logger.warning(f"Input text length ({len(text)} chars) exceeds estimated limit ({MAX_INPUT_LENGTH}). Truncating.")
|
| 463 |
+
truncation_marker = "\n\n[... Text truncated due to length ...]"
|
| 464 |
+
text = text[:MAX_INPUT_LENGTH - len(truncation_marker)] + truncation_marker
|
| 465 |
+
|
| 466 |
+
# Construct the messages payload for the API
|
| 467 |
+
messages = [
|
| 468 |
+
{"role": "system", "content": system_message},
|
| 469 |
+
{"role": "user", "content": f"{user_prompt_instruction}\n\n--- TEXT TO SUMMARIZE ---\n\n{text}\n\n--- END OF TEXT ---"}
|
| 470 |
+
]
|
| 471 |
|
| 472 |
+
# Referer and Title for OpenRouter identification
|
| 473 |
+
space_host = os.environ.get("SPACE_HOST", "huggingface.co/spaces/YOUR_SPACE_NAME") # Replace default if needed
|
| 474 |
+
referer_url = f"https://{space_host}" if space_host and not space_host.startswith("http") else space_host or "https://huggingface.co"
|
| 475 |
+
headers = {
|
| 476 |
+
"Authorization": f"Bearer {api_key}",
|
| 477 |
+
"Content-Type": "application/json",
|
| 478 |
+
"HTTP-Referer": referer_url,
|
| 479 |
+
"X-Title": "Telegram URL Summarizer Bot" # Or your bot's name
|
| 480 |
+
}
|
| 481 |
+
payload = json.dumps({"model": model_name, "messages": messages})
|
| 482 |
|
| 483 |
try:
|
| 484 |
+
logger.debug(f"Sending request to OpenRouter (Model: {model_name}). Prompt length approx: {len(text)} chars.")
|
| 485 |
+
# Increased timeout for potentially long AI generation
|
| 486 |
+
response = await asyncio.to_thread(requests.post, openrouter_api_endpoint, headers=headers, data=payload, timeout=120)
|
| 487 |
logger.debug(f"Received status {response.status_code} from OpenRouter.")
|
| 488 |
+
|
| 489 |
if response.status_code == 200:
|
| 490 |
try:
|
| 491 |
data = response.json()
|
| 492 |
+
# Check for response structure variations
|
| 493 |
+
choice = data.get("choices", [{}])[0]
|
| 494 |
+
message = choice.get("message", {})
|
| 495 |
+
summary = message.get("content")
|
| 496 |
+
finish_reason = choice.get("finish_reason")
|
| 497 |
+
|
| 498 |
+
if summary and isinstance(summary, str) and summary.strip():
|
| 499 |
+
summary = summary.strip()
|
| 500 |
+
logger.info(f"Successfully generated summary. Finish Reason: {finish_reason}. Length: {len(summary)}")
|
| 501 |
+
# Optional: Add post-processing checks (e.g., length for paragraph)
|
| 502 |
+
if summary_type == "paragraph" and len(summary.split()) > 95: # Allow slight overrun from 85 words
|
| 503 |
+
logger.warning(f"Generated paragraph summary slightly longer than target word count ({len(summary.split())} words).")
|
| 504 |
+
return summary
|
| 505 |
+
else:
|
| 506 |
+
logger.warning(f"OpenRouter returned status 200 but summary content is missing or empty. Response data: {data}")
|
| 507 |
+
return "Sorry, the AI model returned an empty summary. The content might have been unsuitable."
|
| 508 |
+
|
| 509 |
+
except (json.JSONDecodeError, IndexError, KeyError, AttributeError) as e:
|
| 510 |
+
logger.error(f"Failed to parse successful (200) response from OpenRouter. Error: {e}. Response Text: {response.text[:500]}...", exc_info=True)
|
| 511 |
+
return "Sorry, there was an issue parsing the response from the AI service."
|
| 512 |
+
except Exception as e:
|
| 513 |
+
logger.error(f"Unexpected error processing OpenRouter success response: {e}", exc_info=True)
|
| 514 |
+
return "Sorry, an unexpected error occurred while processing the AI response."
|
| 515 |
+
|
| 516 |
+
# Handle specific HTTP error codes from OpenRouter
|
| 517 |
+
elif response.status_code == 401: logger.error("OpenRouter API key is invalid (Unauthorized - 401)."); return "Error: AI service authentication failed. Please check the configuration."
|
| 518 |
+
elif response.status_code == 402: logger.error("OpenRouter Payment Required (402). Check credits/limits."); return "Sorry, there's an issue with the AI service account limits or payment."
|
| 519 |
+
elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Hit (429)."); return "Sorry, the AI model is currently busy due to high demand. Please try again in a moment."
|
| 520 |
+
elif response.status_code == 400: logger.error(f"OpenRouter Bad Request (400). Likely prompt issue. Response: {response.text[:500]}..."); return "Sorry, the request to the AI service was invalid (possibly due to the content or prompt)."
|
| 521 |
+
elif response.status_code >= 500: logger.error(f"OpenRouter Server Error ({response.status_code}). Response: {response.text[:500]}..."); return "Sorry, the AI service is experiencing internal issues. Please try again later."
|
| 522 |
else:
|
| 523 |
+
# Handle other unexpected errors
|
| 524 |
+
logger.error(f"Unexpected HTTP status {response.status_code} from OpenRouter. Response: {response.text[:500]}...")
|
| 525 |
+
try: # Try to extract an error message from the response body
|
| 526 |
+
error_data = response.json()
|
| 527 |
+
error_msg = error_data.get("error", {}).get("message", response.text[:100])
|
| 528 |
+
return f"Sorry, the AI service returned an error ({response.status_code}): {error_msg}"
|
| 529 |
+
except json.JSONDecodeError:
|
| 530 |
+
return f"Sorry, the AI service returned an unexpected error (Status: {response.status_code})."
|
| 531 |
+
|
| 532 |
+
except requests.exceptions.Timeout: logger.error("Timeout connecting to OpenRouter API."); return "Sorry, the request to the AI model timed out. Please try again."
|
| 533 |
+
except requests.exceptions.RequestException as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, there was a network error connecting to the AI model service."
|
| 534 |
+
except Exception as e: logger.error(f"Unexpected error occurred within generate_summary function: {e}", exc_info=True); return "Sorry, an unexpected internal error occurred while generating the summary."
|
| 535 |
|
| 536 |
|
| 537 |
# --- Telegram Bot Handlers ---
|
| 538 |
|
| 539 |
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
| 540 |
+
"""Handles the /start command."""
|
| 541 |
+
user = update.effective_user
|
| 542 |
+
if not user: return # Should not happen with a command
|
| 543 |
+
logger.info(f"User {user.id} ({user.username or 'NoUsername'}) initiated /start.")
|
| 544 |
+
# Use mention_html for linking username if available, otherwise just first name
|
| 545 |
mention = user.mention_html() if user.username else user.first_name
|
| 546 |
+
start_message = (
|
| 547 |
+
f"👋 Hello {mention}!\n\n"
|
| 548 |
+
"I can summarise YouTube videos or web articles for you.\n\n"
|
| 549 |
+
"Just send me a link (URL) and I'll ask you whether you want the summary as a paragraph or bullet points.\n\n"
|
| 550 |
+
"Type /help for more details."
|
| 551 |
+
)
|
| 552 |
+
await update.message.reply_html(start_message)
|
| 553 |
|
| 554 |
async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
| 555 |
+
"""Handles the /help command."""
|
| 556 |
+
user = update.effective_user
|
| 557 |
+
logger.info(f"User {user.id if user else '?'} requested /help.")
|
| 558 |
+
help_text = (
|
| 559 |
+
"**How to Use Me:**\n"
|
| 560 |
+
"1. Send me a direct link (URL) to a YouTube video or a web article.\n"
|
| 561 |
+
"2. I will ask you to choose the summary format: `Paragraph` or `Points`.\n"
|
| 562 |
+
"3. Click the button for your preferred format.\n"
|
| 563 |
+
"4. I'll fetch the content, summarise it using AI, and send it back to you!\n\n"
|
| 564 |
+
"**Important Notes:**\n"
|
| 565 |
+
"- **YouTube:** Getting transcripts can sometimes fail if they are disabled, unavailable for the video's language, or if YouTube temporarily blocks requests.\n"
|
| 566 |
+
"- **Websites:** I do my best to extract the main article content, but complex websites (especially those heavily reliant on JavaScript or with strong anti-scraping measures) might not work perfectly. I have a fallback service to help with tricky sites.\n"
|
| 567 |
+
"- **AI Summaries:** The AI tries its best to be accurate and follow the requested format, but errors or unexpected outputs are possible.\n"
|
| 568 |
+
"- **Length:** Very long articles or videos might be truncated before summarization to fit within processing limits.\n\n"
|
| 569 |
+
"Just send a link to get started!"
|
| 570 |
+
)
|
| 571 |
+
# Use MarkdownV2 for better formatting control if needed, but MARKDOWN is simpler
|
| 572 |
await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
|
| 573 |
|
| 574 |
async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
| 575 |
+
"""Handles messages containing potential URLs."""
|
| 576 |
if not update.message or not update.message.text: return
|
| 577 |
+
message_text = update.message.text.strip()
|
| 578 |
+
user = update.effective_user
|
| 579 |
+
if not user: return # Should not happen with a message
|
| 580 |
+
|
| 581 |
+
# More robust URL regex (handles various protocols, domains, paths, queries)
|
| 582 |
+
# Still simple, not aiming for perfect RFC 3986 validation
|
| 583 |
+
url_pattern = r'https?://(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}(?:/[^\s]*)?'
|
| 584 |
+
match = re.search(url_pattern, message_text)
|
| 585 |
+
|
| 586 |
if match:
|
| 587 |
+
url = match.group(0)
|
| 588 |
+
logger.info(f"User {user.id} sent potential URL: {url}")
|
| 589 |
+
|
| 590 |
+
# Store URL in user_data, associated with the user ID
|
| 591 |
+
context.user_data['url_to_summarize'] = url
|
| 592 |
+
logger.debug(f"Stored URL '{url}' in user_data for user {user.id}")
|
| 593 |
+
|
| 594 |
+
keyboard = [
|
| 595 |
+
[
|
| 596 |
+
InlineKeyboardButton("📜 Paragraph Summary", callback_data="paragraph"),
|
| 597 |
+
InlineKeyboardButton("🔹 Bullet Points", callback_data="points")
|
| 598 |
+
]
|
| 599 |
+
]
|
| 600 |
reply_markup = InlineKeyboardMarkup(keyboard)
|
| 601 |
+
|
| 602 |
+
# Send message asking for summary type
|
| 603 |
+
await update.message.reply_text(
|
| 604 |
+
f"✅ Link received:\n`{url}`\n\nChoose your desired summary format:",
|
| 605 |
+
reply_markup=reply_markup,
|
| 606 |
+
parse_mode=ParseMode.MARKDOWN,
|
| 607 |
+
link_preview_options={'is_disabled': True} # Disable link preview for this message
|
| 608 |
+
)
|
| 609 |
+
else:
|
| 610 |
+
# If it doesn't look like a URL, maybe provide guidance?
|
| 611 |
+
# logger.debug(f"Ignoring non-URL message from {user.id}: {message_text[:100]}")
|
| 612 |
+
# Optional: Reply if it's not a command and not a URL
|
| 613 |
+
if not message_text.startswith('/'):
|
| 614 |
+
await update.message.reply_text("Please send me a valid URL (starting with http:// or https://) to summarize.")
|
| 615 |
+
|
| 616 |
|
| 617 |
async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
| 618 |
+
"""Handles button presses for choosing summary type."""
|
| 619 |
+
query = update.callback_query
|
| 620 |
+
if not query or not query.from_user:
|
| 621 |
+
logger.warning("Callback query or user missing in update.")
|
| 622 |
+
return # Can't proceed without query/user
|
| 623 |
+
user = query.from_user
|
| 624 |
+
|
| 625 |
+
# --- Answer Callback Query Immediately ---
|
| 626 |
+
try:
|
| 627 |
+
await query.answer() # Acknowledge the button press
|
| 628 |
+
logger.debug(f"Answered callback query {query.id} for user {user.id}")
|
| 629 |
+
except TimedOut:
|
| 630 |
+
# Log timeout but proceed; the button loading indicator might just hang for the user
|
| 631 |
+
logger.warning(f"Timeout answering callback query {query.id} for user {user.id}. Processing continues.")
|
| 632 |
+
except Exception as e:
|
| 633 |
+
# Log other errors but proceed cautiously. The button might remain "loading".
|
| 634 |
+
logger.error(f"Error answering callback query {query.id} for user {user.id}: {e!r}", exc_info=True)
|
| 635 |
+
|
| 636 |
+
summary_type = query.data # 'paragraph' or 'points'
|
| 637 |
+
# Retrieve URL stored earlier for this user
|
| 638 |
+
url = context.user_data.get('url_to_summarize')
|
| 639 |
+
logger.info(f"User {user.id} chose summary type '{summary_type}'. Checking for stored URL.")
|
| 640 |
+
|
| 641 |
if not url:
|
| 642 |
+
logger.warning(f"User {user.id} pressed button '{summary_type}', but NO URL found in user_data context.")
|
| 643 |
+
try:
|
| 644 |
+
# Inform user context was lost (e.g., bot restarted, long delay)
|
| 645 |
+
await query.edit_message_text(text="⚠️ Oops! I lost the context for that link. Please send the link again.")
|
| 646 |
+
except TimedOut:
|
| 647 |
+
logger.error(f"Timeout trying to edit message to inform user {user.id} about lost context.")
|
| 648 |
+
except Exception as edit_err:
|
| 649 |
+
# Log error if editing fails (message might already be gone, or other Telegram issue)
|
| 650 |
+
logger.error(f"Failed to edit message for lost context for user {user.id}: {edit_err}")
|
| 651 |
+
return # Stop processing if URL is missing
|
| 652 |
+
|
| 653 |
+
# --- URL Found - Proceed with Processing ---
|
| 654 |
+
logger.info(f"Processing URL '{url}' for user {user.id} with type '{summary_type}'.")
|
| 655 |
+
# Clear the URL from context now that we're processing it
|
| 656 |
+
context.user_data.pop('url_to_summarize', None)
|
| 657 |
+
logger.debug(f"Cleared URL from user_data for user {user.id}")
|
| 658 |
+
|
| 659 |
+
# Fetch current API keys (allows for potential runtime changes, though unlikely here)
|
| 660 |
+
current_openrouter_key = os.environ.get('OPENROUTER_API_KEY')
|
| 661 |
+
current_urltotext_key = os.environ.get('URLTOTEXT_API_KEY')
|
| 662 |
+
current_supadata_key = os.environ.get('SUPADATA_API_KEY')
|
| 663 |
+
current_apify_token = os.environ.get('APIFY_API_TOKEN')
|
| 664 |
+
# Simple check log
|
| 665 |
+
keys_present = f"OR={'Y' if current_openrouter_key else 'N'}, UTT={'Y' if current_urltotext_key else 'N'}, SD={'Y' if current_supadata_key else 'N'}, AP={'Y' if current_apify_token else 'N'}"
|
| 666 |
+
logger.debug(f"API Key check for user {user.id} request: {keys_present}")
|
| 667 |
+
|
| 668 |
+
# Critical dependency check: AI key
|
| 669 |
if not current_openrouter_key:
|
| 670 |
+
logger.error(f"CRITICAL: OpenRouter API key is missing. Cannot generate summary for user {user.id}.")
|
| 671 |
+
try:
|
| 672 |
+
await query.edit_message_text(text="❌ Configuration Error: The AI summarization service is not configured correctly. Please contact the administrator.")
|
| 673 |
+
except TimedOut:
|
| 674 |
+
logger.error(f"Timeout editing message to inform user {user.id} about missing AI key.")
|
| 675 |
+
except Exception as edit_err:
|
| 676 |
+
logger.error(f"Failed to edit message for missing AI key for user {user.id}: {edit_err}")
|
| 677 |
return
|
| 678 |
+
|
| 679 |
+
# --- Inform User Processing Has Started ---
|
| 680 |
+
processing_message_text = f"⏳ Working on your '{summary_type}' summary for the link...\n_(This might take up to a minute depending on the content)_"
|
| 681 |
+
message_to_edit = query.message # The message with the buttons
|
| 682 |
+
status_message_sent = None # Will hold msg ID if we send a new status message
|
| 683 |
+
|
| 684 |
+
try:
|
| 685 |
+
if message_to_edit:
|
| 686 |
+
await query.edit_message_text(text=processing_message_text)
|
| 687 |
+
logger.debug(f"Edited original message {message_to_edit.message_id} to show 'Working...' status for query {query.id}")
|
| 688 |
+
else:
|
| 689 |
+
# This case should be rare if query.message exists, but handle defensively
|
| 690 |
+
logger.warning(f"Original message (query.message) not found for query {query.id}. Cannot edit, will send new status message.")
|
| 691 |
+
raise ValueError("Original message object missing") # Force fallback to sending new message
|
| 692 |
+
except (TimedOut, Exception) as e:
|
| 693 |
+
# If editing fails (e.g., message too old, deleted, rate limit), try sending a new message
|
| 694 |
+
logger.warning(f"Could not edit original message {message_to_edit.message_id if message_to_edit else 'N/A'} for query {query.id}: {e!r}. Attempting to send a new status message.")
|
| 695 |
+
message_to_edit = None # Ensure we don't try to delete this later if editing failed
|
| 696 |
+
try:
|
| 697 |
+
status_message_sent = await context.bot.send_message(chat_id=user.id, text=processing_message_text)
|
| 698 |
+
logger.debug(f"Sent new status message {status_message_sent.message_id} to user {user.id}.")
|
| 699 |
+
except TimedOut:
|
| 700 |
+
logger.error(f"Timeout sending NEW 'Working...' status message to user {user.id}. Processing continues without feedback.")
|
| 701 |
+
# User won't know bot is working - proceed anyway, hope for the best.
|
| 702 |
+
except Exception as send_err:
|
| 703 |
+
logger.error(f"Failed sending NEW 'Working...' status message to user {user.id}: {send_err}. Processing continues without feedback.")
|
| 704 |
+
# As above.
|
| 705 |
+
|
| 706 |
+
# --- Main Content Fetching and Summarization ---
|
| 707 |
+
content = None
|
| 708 |
+
user_feedback_message = None # Holds error/status messages for the user
|
| 709 |
+
success = False # Tracks if we successfully sent a summary
|
| 710 |
+
|
| 711 |
try:
|
| 712 |
+
# Send 'typing' action to indicate activity
|
| 713 |
+
try:
|
| 714 |
+
logger.debug(f"Sending 'typing' chat action to chat {user.id}")
|
| 715 |
+
await context.bot.send_chat_action(chat_id=user.id, action='typing')
|
| 716 |
+
except TimedOut: logger.warning(f"Timeout sending 'typing' action for user {user.id}.")
|
| 717 |
+
except Exception as ca_err: logger.warning(f"Failed sending 'typing' action for user {user.id}: {ca_err}")
|
| 718 |
+
|
| 719 |
+
# --- Determine Content Type and Fetch ---
|
| 720 |
+
is_yt = is_youtube_url(url)
|
| 721 |
+
logger.debug(f"URL ({url}) is YouTube: {is_yt} (User: {user.id})")
|
| 722 |
+
|
| 723 |
if is_yt:
|
| 724 |
video_id = extract_youtube_id(url)
|
| 725 |
if video_id:
|
| 726 |
+
logger.info(f"Fetching YouTube transcript for video ID: {video_id} (User: {user.id})")
|
| 727 |
+
content = await get_youtube_transcript(video_id, url, current_supadata_key, current_apify_token)
|
| 728 |
+
if not content:
|
| 729 |
+
logger.warning(f"Failed to get YouTube transcript for {video_id} (User: {user.id}).")
|
| 730 |
+
user_feedback_message = "⚠️ Sorry, I couldn't retrieve the transcript for that YouTube video. It might be unavailable, private, have captions disabled, or an error occurred."
|
| 731 |
+
else:
|
| 732 |
+
logger.info(f"Successfully fetched YouTube transcript for {video_id}. Length: {len(content)} (User: {user.id})")
|
| 733 |
+
else:
|
| 734 |
+
logger.warning(f"Failed to extract YouTube video ID from URL: {url} (User: {user.id})")
|
| 735 |
+
user_feedback_message = "⚠️ Sorry, I couldn't identify a valid YouTube video ID in the link you provided."
|
| 736 |
else:
|
| 737 |
+
# --- Website Scraping ---
|
| 738 |
+
logger.info(f"Attempting website scrape (Requests/BS4) for URL: {url} (User: {user.id})")
|
| 739 |
+
content = await get_website_content_via_requests(url)
|
| 740 |
+
if content:
|
| 741 |
+
logger.info(f"Website scrape successful (Requests/BS4). Length: {len(content)} (User: {user.id})")
|
| 742 |
+
# Content found, no need for feedback message yet
|
| 743 |
else:
|
| 744 |
+
logger.warning(f"Primary website scrape failed for {url} (User: {user.id}). Trying fallback API.")
|
| 745 |
if current_urltotext_key:
|
| 746 |
+
# Send typing again if first scrape failed and we try another method
|
| 747 |
+
try: await context.bot.send_chat_action(chat_id=user.id, action='typing'); logger.debug("Sent typing before fallback scrape.")
|
| 748 |
+
except: pass # Ignore if fails
|
| 749 |
+
|
| 750 |
+
logger.info(f"Attempting website scrape via URLToText API for: {url} (User: {user.id})")
|
| 751 |
+
content = await get_website_content_via_urltotext_api(url, current_urltotext_key)
|
| 752 |
+
if content:
|
| 753 |
+
logger.info(f"Website scrape successful via URLToText API. Length: {len(content)} (User: {user.id})")
|
| 754 |
+
else:
|
| 755 |
+
logger.warning(f"Fallback website scrape (URLToText API) also failed for {url} (User: {user.id}).")
|
| 756 |
+
user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website using available methods. It might be protected or structured in a way I can't parse."
|
| 757 |
+
else:
|
| 758 |
+
# Fallback key missing
|
| 759 |
+
logger.warning(f"Primary scrape failed and URLToText API key not configured. Cannot fallback for {url} (User: {user.id}).")
|
| 760 |
+
user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website, and the fallback service isn't configured."
|
| 761 |
+
|
| 762 |
+
# --- Generate Summary if Content Was Fetched ---
|
| 763 |
if content:
|
| 764 |
+
logger.info(f"Content fetched (Length: {len(content)}). Generating '{summary_type}' summary for user {user.id}.")
|
| 765 |
+
# Send typing before potentially long AI call
|
| 766 |
+
try: await context.bot.send_chat_action(chat_id=user.id, action='typing'); logger.debug("Sent typing before AI summary generation.")
|
| 767 |
+
except: pass
|
| 768 |
+
|
| 769 |
summary = await generate_summary(content, summary_type, current_openrouter_key)
|
| 770 |
+
|
| 771 |
+
# Check if summary generation returned an error message
|
| 772 |
+
if summary.startswith("Error:") or summary.startswith("Sorry,"):
|
| 773 |
+
logger.warning(f"AI summary generation failed for user {user.id}. Reason: {summary}")
|
| 774 |
+
user_feedback_message = f"⚠️ {summary}" # Use the error message from generate_summary
|
| 775 |
+
else:
|
| 776 |
+
# --- Summary Success - Send to User ---
|
| 777 |
+
logger.info(f"Summary generated successfully for user {user.id}. Length: {len(summary)}. Sending result.")
|
| 778 |
+
try:
|
| 779 |
+
await context.bot.send_message(
|
| 780 |
+
chat_id=user.id,
|
| 781 |
+
text=summary,
|
| 782 |
+
parse_mode=ParseMode.MARKDOWN, # Assuming AI generates markdown for points
|
| 783 |
+
link_preview_options={'is_disabled': True}
|
| 784 |
+
)
|
| 785 |
+
success = True
|
| 786 |
+
user_feedback_message = None # Clear any previous fetching error message
|
| 787 |
+
logger.info(f"Successfully sent summary to user {user.id}.")
|
| 788 |
+
except TimedOut:
|
| 789 |
+
logger.error(f"Timeout sending final summary message to user {user.id}.")
|
| 790 |
+
user_feedback_message = "⚠️ Sorry, there was a timeout while trying to send you the final summary."
|
| 791 |
+
success = False # Mark as failed if sending timed out
|
| 792 |
+
except Exception as send_final_err:
|
| 793 |
+
logger.error(f"Failed sending final summary to user {user.id}: {send_final_err}", exc_info=True)
|
| 794 |
+
user_feedback_message = "⚠️ Sorry, an unexpected error occurred while sending the final summary."
|
| 795 |
+
success = False # Mark as failed
|
| 796 |
+
|
| 797 |
+
elif not user_feedback_message:
|
| 798 |
+
# If content is None, but no specific error message was set above, set a generic one.
|
| 799 |
+
logger.warning(f"Content retrieval resulted in None, but no specific user feedback message was set. URL: {url} (User: {user.id})")
|
| 800 |
+
user_feedback_message = "⚠️ Sorry, I couldn't retrieve any usable content from the link provided."
|
| 801 |
+
|
| 802 |
+
# --- Send Final Feedback Message if Processing Failed ---
|
| 803 |
+
if user_feedback_message and not success:
|
| 804 |
+
logger.warning(f"Processing failed or summary sending failed for user {user.id}. Sending feedback: {user_feedback_message}")
|
| 805 |
+
try:
|
| 806 |
+
await context.bot.send_message(chat_id=user.id, text=user_feedback_message)
|
| 807 |
+
except TimedOut:
|
| 808 |
+
logger.error(f"Timeout sending final FAILURE feedback message to user {user.id}.")
|
| 809 |
+
except Exception as send_feedback_err:
|
| 810 |
+
logger.error(f"Failed sending final FAILURE feedback message to user {user.id}: {send_feedback_err}")
|
| 811 |
+
|
| 812 |
except Exception as e:
|
| 813 |
+
# Catch-all for unexpected errors during the main processing block
|
| 814 |
+
logger.error(f"Unexpected critical error during callback processing for user {user.id}, URL {url}: {e}", exc_info=True)
|
| 815 |
+
try:
|
| 816 |
+
# Send a generic error message to the user
|
| 817 |
+
await context.bot.send_message(chat_id=user.id, text="❌ Oops! An unexpected internal error occurred while processing your request. The issue has been logged.")
|
| 818 |
+
except TimedOut:
|
| 819 |
+
logger.error(f"Timeout sending CRITICAL internal error feedback message to user {user.id}.")
|
| 820 |
+
except Exception as final_err:
|
| 821 |
+
# If even sending the error message fails, log it.
|
| 822 |
+
logger.error(f"Failed sending CRITICAL internal error feedback message to user {user.id}: {final_err}")
|
| 823 |
+
# Ensure success is False if we hit this block
|
| 824 |
+
success = False
|
| 825 |
+
|
| 826 |
finally:
|
| 827 |
+
# --- Clean up Status Message(s) ---
|
| 828 |
+
logger.debug(f"Cleaning up status message(s) for user {user.id}, query {query.id}. Success={success}")
|
| 829 |
try:
|
| 830 |
+
if status_message_sent:
|
| 831 |
+
# If we sent a separate "Working..." message, delete it regardless of success/failure
|
| 832 |
+
# as the final result or error message has been (or attempted to be) sent.
|
| 833 |
+
await context.bot.delete_message(chat_id=user.id, message_id=status_message_sent.message_id)
|
| 834 |
+
logger.debug(f"Deleted separate status message {status_message_sent.message_id} for user {user.id}.")
|
| 835 |
+
elif message_to_edit:
|
| 836 |
+
# If we edited the original message with the buttons...
|
| 837 |
+
if success:
|
| 838 |
+
# If processing succeeded, delete the "Working..." message.
|
| 839 |
+
await query.delete_message()
|
| 840 |
+
logger.debug(f"Processing succeeded. Deleted original (edited) message {message_to_edit.message_id} for query {query.id}.")
|
| 841 |
+
else:
|
| 842 |
+
# If processing failed, *don't* delete the message.
|
| 843 |
+
# It either still shows "Working..." (if sending final error failed)
|
| 844 |
+
# or it might show an error message if edit_message_text was used for that.
|
| 845 |
+
# Let's try to edit it one last time to show a generic failure if no specific feedback was sent.
|
| 846 |
+
# This is complex, maybe just leave it as is for simplicity.
|
| 847 |
+
logger.debug(f"Processing failed. Leaving edited message {message_to_edit.message_id} in place for query {query.id}.")
|
| 848 |
+
# Optional: Try one last edit to show failure if needed, but might be overkill
|
| 849 |
+
# if not user_feedback_message: # Only if no other error was sent
|
| 850 |
+
# try: await query.edit_message_text("❌ Processing failed.")
|
| 851 |
+
# except: pass # Ignore errors here
|
| 852 |
+
|
| 853 |
+
# If message_to_edit was None (original edit failed) and status_message_sent was None (sending new status failed), there's nothing to delete here.
|
| 854 |
+
|
| 855 |
+
except TimedOut:
|
| 856 |
+
logger.warning(f"Timeout attempting to delete status/button message for user {user.id}, query {query.id}.")
|
| 857 |
+
except Exception as del_e:
|
| 858 |
+
# Log deletion errors as warnings, not critical if cleanup fails.
|
| 859 |
+
# Common error: message already deleted or trying to delete too late.
|
| 860 |
+
logger.warning(f"Could not delete status/button message for user {user.id}, query {query.id}: {del_e!r}")
|
| 861 |
+
|
| 862 |
+
# Log the completion of the callback handling
|
| 863 |
+
logger.info(f"Finished handling callback query {query.id} for user {user.id}. Overall Success: {success}")
|
| 864 |
+
|
| 865 |
|
| 866 |
async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
|
| 867 |
"""Log Errors caused by Updates."""
|
| 868 |
logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
|
| 869 |
+
# Add specific error type handling if needed (e.g., NetworkError, TimedOut)
|
| 870 |
+
if isinstance(context.error, TimedOut):
|
| 871 |
+
logger.warning("A timeout error occurred in PTB communication.")
|
| 872 |
+
elif isinstance(context.error, NetworkError):
|
| 873 |
+
logger.warning(f"A network error occurred: {context.error}")
|
| 874 |
+
# Consider notifying admin or user for specific critical errors if appropriate
|
| 875 |
|
| 876 |
+
# --- Bot Setup Function (Modified: Increased Pool/Timeouts) ---
|
| 877 |
async def setup_bot_config() -> Application:
|
| 878 |
"""Configures the PTB Application with custom HTTPX settings."""
|
| 879 |
logger.info("Configuring Telegram Application...")
|
|
|
|
| 881 |
logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable not found.")
|
| 882 |
raise ValueError("TELEGRAM_TOKEN environment variable not set.")
|
| 883 |
|
| 884 |
+
# --- Configure HTTPX client settings ---
|
| 885 |
+
connect_timeout = 10.0 # Slightly higher connect timeout
|
| 886 |
+
# --- INCREASED TIMEOUTS AND POOL SIZE ---
|
| 887 |
+
read_timeout = 30.0 # Increased timeout for reading response
|
| 888 |
+
write_timeout = 30.0 # Increased timeout for sending request
|
| 889 |
+
pool_timeout = 30.0 # Increased timeout for getting connection from pool
|
| 890 |
+
connection_pool_size = 50 # Significantly increased pool size
|
| 891 |
|
| 892 |
+
logger.info(f"Creating PTB HTTPXRequest with settings: "
|
| 893 |
+
f"connect_timeout={connect_timeout}, read_timeout={read_timeout}, "
|
| 894 |
+
f"write_timeout={write_timeout}, pool_timeout={pool_timeout}, "
|
| 895 |
+
f"pool_size={connection_pool_size}")
|
| 896 |
+
|
| 897 |
+
# Create httpx.Limits object
|
| 898 |
+
custom_limits = httpx.Limits(
|
| 899 |
+
max_connections=connection_pool_size,
|
| 900 |
+
max_keepalive_connections=connection_pool_size # Keepalive same as max
|
| 901 |
+
# keepalive_expiry=60.0 # Optional: Keep idle connections open longer (seconds)
|
| 902 |
+
)
|
| 903 |
|
| 904 |
# Create a custom request object with these settings
|
|
|
|
| 905 |
custom_request = HTTPXRequest(
|
| 906 |
connect_timeout=connect_timeout,
|
| 907 |
read_timeout=read_timeout,
|
| 908 |
+
write_timeout=write_timeout,
|
| 909 |
pool_timeout=pool_timeout,
|
| 910 |
+
limits=custom_limits, # Use the Limits object here
|
| 911 |
+
http_version="1.1" # HTTP/1.1 is usually fine, HTTP/2 might be slightly faster if supported end-to-end
|
| 912 |
)
|
| 913 |
|
| 914 |
# Use Application.builder() and pass the custom request object
|
| 915 |
application_builder = Application.builder().token(TELEGRAM_TOKEN)
|
| 916 |
application_builder.request(custom_request)
|
| 917 |
+
# Also apply to get_updates if you were using polling (webhook doesn't use this heavily)
|
| 918 |
+
# application_builder.get_updates_request(custom_request)
|
| 919 |
+
# Apply connection pool settings globally if needed (less common now with direct request object)
|
| 920 |
+
# application_builder.pool_timeout(pool_timeout) # This might be redundant if set in HTTPXRequest
|
| 921 |
|
| 922 |
# Build the application instance
|
| 923 |
application = application_builder.build()
|
| 924 |
|
| 925 |
+
# --- Register Handlers ---
|
| 926 |
application.add_handler(CommandHandler("start", start))
|
| 927 |
application.add_handler(CommandHandler("help", help_command))
|
| 928 |
+
# Handles non-command text messages that might contain a URL
|
| 929 |
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
|
| 930 |
+
# Handles the button clicks ('paragraph' or 'points')
|
| 931 |
application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
|
| 932 |
+
# Global error handler
|
| 933 |
application.add_error_handler(error_handler)
|
| 934 |
|
| 935 |
+
logger.info("Telegram application handlers configured.")
|
| 936 |
return application
|
| 937 |
|
| 938 |
# --- ASGI Lifespan Context Manager ---
|
|
|
|
| 940 |
async def lifespan(app: Starlette):
|
| 941 |
"""Handles PTB startup and shutdown during ASGI lifespan."""
|
| 942 |
global ptb_app
|
| 943 |
+
logger.info("ASGI Lifespan: Startup sequence initiated...")
|
| 944 |
+
# loop = asyncio.get_running_loop() # Not usually needed directly
|
| 945 |
|
| 946 |
try:
|
| 947 |
+
# --- Setup and Initialize PTB Application ---
|
| 948 |
ptb_app = await setup_bot_config()
|
| 949 |
+
logger.info("PTB Application object configured. Initializing...")
|
| 950 |
+
await ptb_app.initialize() # Initialize application components (e.g., bot instance)
|
| 951 |
+
logger.info("PTB Application initialized. Starting background tasks (e.g., job queue)...")
|
| 952 |
+
# Start PTB's internal tasks but not polling (we use webhook)
|
| 953 |
await ptb_app.start()
|
| 954 |
+
if ptb_app.updater: ptb_app.updater.stop() # Ensure polling is stopped if accidentally started
|
| 955 |
+
bot_instance = ptb_app.bot
|
| 956 |
+
bot_info = await bot_instance.get_me()
|
| 957 |
+
logger.info(f"PTB Application started successfully. Bot ID: {bot_info.id}, Username: @{bot_info.username}")
|
| 958 |
|
| 959 |
+
# --- Set Webhook ---
|
| 960 |
+
# Ensure SPACE_HOST is correctly set in Hugging Face Space secrets
|
| 961 |
WEBHOOK_URL_BASE = os.environ.get("SPACE_HOST")
|
| 962 |
if WEBHOOK_URL_BASE:
|
| 963 |
+
# Ensure it's a proper HTTPS URL
|
| 964 |
if not WEBHOOK_URL_BASE.startswith("https://"): WEBHOOK_URL_BASE = f"https://{WEBHOOK_URL_BASE}"
|
| 965 |
+
webhook_path = "/webhook" # Must match the route defined later
|
| 966 |
full_webhook_url = f"{WEBHOOK_URL_BASE.rstrip('/')}{webhook_path}"
|
| 967 |
+
|
| 968 |
+
logger.info(f"Attempting to set Telegram webhook to: {full_webhook_url}")
|
| 969 |
+
# Short delay can sometimes help prevent race conditions on startup
|
| 970 |
+
await asyncio.sleep(2.0)
|
| 971 |
try:
|
| 972 |
+
# Set the webhook, specifying allowed updates can reduce load
|
| 973 |
+
await bot_instance.set_webhook(
|
| 974 |
+
url=full_webhook_url,
|
| 975 |
+
allowed_updates=Update.ALL_TYPES, # Or specify like [Update.MESSAGE, Update.CALLBACK_QUERY]
|
| 976 |
+
# secret_token="YOUR_SECRET_TOKEN" # Recommended for security if possible
|
| 977 |
+
# drop_pending_updates=True # Optional: Ignore updates sent while bot was down
|
| 978 |
+
)
|
| 979 |
+
# Verify webhook setup
|
| 980 |
+
webhook_info = await bot_instance.get_webhook_info()
|
| 981 |
+
if webhook_info.url == full_webhook_url:
|
| 982 |
+
logger.info(f"Telegram webhook set successfully! Current info: {webhook_info}")
|
| 983 |
+
else:
|
| 984 |
+
logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', Got: {webhook_info.url}. Info: {webhook_info}")
|
| 985 |
except RetryAfter as e:
|
| 986 |
+
# This can happen if multiple workers try to set the webhook simultaneously
|
| 987 |
+
logger.warning(f"Webhook setting throttled by Telegram (RetryAfter: {e.retry_after}s). Another instance likely succeeded or try again later.")
|
| 988 |
+
# Optionally check info again after delay
|
| 989 |
+
await asyncio.sleep(e.retry_after or 2)
|
| 990 |
+
webhook_info = await bot_instance.get_webhook_info()
|
| 991 |
+
logger.info(f"Webhook info after RetryAfter delay: {webhook_info}")
|
| 992 |
except Exception as e:
|
| 993 |
+
logger.error(f"Failed to set Telegram webhook to {full_webhook_url}: {e}", exc_info=True)
|
| 994 |
+
else:
|
| 995 |
+
logger.warning("SPACE_HOST environment variable not found. Cannot set webhook automatically. Bot will not receive updates via webhook.")
|
| 996 |
|
| 997 |
+
logger.info("ASGI Lifespan: Startup complete. Application is ready to yield.")
|
| 998 |
+
yield # --- Application runs here ---
|
| 999 |
|
| 1000 |
except Exception as startup_err:
|
| 1001 |
+
logger.critical(f"CRITICAL ERROR during ASGI application startup: {startup_err}", exc_info=True)
|
| 1002 |
+
# Re-raise the exception to potentially stop the ASGI server from starting improperly
|
| 1003 |
raise
|
| 1004 |
finally:
|
| 1005 |
+
# --- Shutdown Sequence ---
|
| 1006 |
+
logger.info("ASGI Lifespan: Shutdown sequence initiated...")
|
| 1007 |
+
if ptb_app:
|
| 1008 |
+
bot_username = ptb_app.bot.username if ptb_app.bot else "N/A"
|
| 1009 |
+
logger.info(f"PTB App instance found for @{bot_username}. Checking if running...")
|
| 1010 |
+
# Check internal state if available (e.g., ptb_app.running might exist in future versions)
|
| 1011 |
+
# Using _running is internal, but often the only way
|
| 1012 |
+
is_running = getattr(ptb_app, '_running', False) or getattr(ptb_app, 'running', False)
|
| 1013 |
+
if is_running:
|
| 1014 |
+
try:
|
| 1015 |
+
logger.info("Stopping PTB Application's background tasks...")
|
| 1016 |
+
await ptb_app.stop() # Stop internal tasks like JobQueue
|
| 1017 |
+
logger.info("Shutting down PTB Application connections and resources...")
|
| 1018 |
+
await ptb_app.shutdown() # Clean up resources (e.g., close HTTPX client)
|
| 1019 |
+
logger.info("PTB Application shut down gracefully.")
|
| 1020 |
+
except Exception as shutdown_err:
|
| 1021 |
+
logger.error(f"Error during PTB Application shutdown: {shutdown_err}", exc_info=True)
|
| 1022 |
+
else:
|
| 1023 |
+
logger.warning("PTB Application instance exists but was not marked as running at shutdown.")
|
| 1024 |
+
# Attempt shutdown anyway just in case resources need cleaning
|
| 1025 |
+
try: await ptb_app.shutdown()
|
| 1026 |
+
except Exception: logger.error("Error during shutdown of non-running PTB app.", exc_info=True)
|
| 1027 |
+
else:
|
| 1028 |
+
logger.warning("No PTB Application instance (ptb_app) found during ASGI shutdown.")
|
| 1029 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
| 1030 |
|
| 1031 |
|
| 1032 |
+
# --- Flask App Setup (for Webhook Route) ---
|
| 1033 |
+
# We use Flask just for its familiarity in defining the route,
|
| 1034 |
+
# but it runs within Starlette's ASGI context via WSGIMiddleware.
|
| 1035 |
flask_core_app = Flask(__name__)
|
| 1036 |
+
logger.info("Core Flask app instance created (used by Starlette for routing).")
|
| 1037 |
|
| 1038 |
+
# --- Define Flask Routes ---
|
| 1039 |
@flask_core_app.route('/')
|
| 1040 |
def index():
|
| 1041 |
"""Basic health check endpoint."""
|
| 1042 |
+
logger.debug("Health check endpoint '/' accessed.")
|
| 1043 |
+
bot_status = "Unknown / Not Initialized"
|
| 1044 |
+
if ptb_app and ptb_app.bot:
|
| 1045 |
+
# Check internal state again (might have changed)
|
| 1046 |
+
is_running = getattr(ptb_app, '_running', False) or getattr(ptb_app, 'running', False)
|
| 1047 |
+
bot_status = f"Running (@{ptb_app.bot.username})" if is_running else f"Initialized/Stopped (@{ptb_app.bot.username})"
|
| 1048 |
+
return f"Telegram Bot Summarizer - Status: {bot_status} - Listening via Starlette/Uvicorn."
|
| 1049 |
|
| 1050 |
@flask_core_app.route('/webhook', methods=['POST'])
|
| 1051 |
async def webhook() -> Response:
|
| 1052 |
+
"""Webhook endpoint called by Telegram."""
|
| 1053 |
+
global ptb_app # Ensure we're using the global instance initialized by lifespan
|
| 1054 |
+
|
| 1055 |
if not ptb_app:
|
| 1056 |
+
logger.error("Webhook triggered, but PTB Application instance (ptb_app) is None. Lifespan likely failed.")
|
| 1057 |
+
# Return 503 Service Unavailable
|
| 1058 |
+
return Response('Bot service is not configured or failed during startup.', status=503)
|
| 1059 |
+
|
| 1060 |
+
# Check internal state (safer than assuming ptb_app implies running)
|
| 1061 |
+
is_running = getattr(ptb_app, '_running', False) or getattr(ptb_app, 'running', False)
|
| 1062 |
+
if not is_running:
|
| 1063 |
+
logger.error("Webhook triggered, but PTB Application is not currently running.")
|
| 1064 |
+
# Return 503 Service Unavailable
|
| 1065 |
+
return Response('Bot service is initialized but not actively running.', status=503)
|
| 1066 |
+
|
| 1067 |
+
# Proceed with processing the update
|
| 1068 |
+
logger.debug("Webhook endpoint received POST request from Telegram.")
|
| 1069 |
+
try:
|
| 1070 |
+
# Use Flask's request object to get JSON data
|
| 1071 |
+
update_data = await request.get_json()
|
| 1072 |
+
if not update_data:
|
| 1073 |
+
logger.warning("Received empty or non-JSON data on webhook.")
|
| 1074 |
+
return Response('Bad Request: Expected JSON payload.', status=400)
|
| 1075 |
+
|
| 1076 |
+
# Deserialize JSON into a Telegram Update object
|
| 1077 |
+
update = Update.de_json(update_data, ptb_app.bot)
|
| 1078 |
+
logger.debug(f"Processing update_id: {update.update_id} via webhook route.")
|
| 1079 |
+
|
| 1080 |
+
# Process the update using PTB's internal mechanisms
|
| 1081 |
+
# This will dispatch it to the correct handler (CommandHandler, MessageHandler, etc.)
|
| 1082 |
+
await ptb_app.process_update(update)
|
| 1083 |
+
|
| 1084 |
+
logger.debug(f"Finished processing update_id: {update.update_id}")
|
| 1085 |
+
# Return 200 OK to Telegram to acknowledge receipt
|
| 1086 |
+
return Response('ok', status=200)
|
| 1087 |
+
|
| 1088 |
+
except json.JSONDecodeError:
|
| 1089 |
+
logger.error("Failed to decode JSON from Telegram webhook request.", exc_info=True)
|
| 1090 |
+
return Response('Bad Request: Invalid JSON format.', status=400)
|
| 1091 |
+
except Exception as e:
|
| 1092 |
+
# Catch potential errors during Update.de_json or ptb_app.process_update
|
| 1093 |
+
logger.error(f"Error processing update in webhook handler: {e}", exc_info=True)
|
| 1094 |
+
# Return 500 Internal Server Error to Telegram
|
| 1095 |
+
# Telegram will likely retry sending the update later
|
| 1096 |
+
return Response('Internal Server Error processing update.', status=500)
|
| 1097 |
+
|
| 1098 |
+
|
| 1099 |
+
# --- Create Starlette ASGI Application ---
|
| 1100 |
+
# This is the main application object that Uvicorn/Gunicorn will run.
|
| 1101 |
app = Starlette(
|
| 1102 |
+
debug=False, # Set debug based on environment if needed, but generally False in prod
|
| 1103 |
+
lifespan=lifespan, # Hook into the lifespan context manager for startup/shutdown
|
| 1104 |
routes=[
|
| 1105 |
+
# Mount the Flask app under the root path. Starlette handles requests
|
| 1106 |
+
# and forwards relevant ones ('/') and ('/webhook') to the Flask app.
|
| 1107 |
Mount("/", app=WSGIMiddleware(flask_core_app))
|
| 1108 |
]
|
| 1109 |
)
|
| 1110 |
+
logger.info("Starlette ASGI application created, configured with lifespan and Flask app mounted at '/'.")
|
| 1111 |
|
| 1112 |
|
| 1113 |
+
# --- Development Server Execution Block ---
|
| 1114 |
+
# This block is ONLY for running the Flask app directly for basic testing
|
| 1115 |
+
# WITHOUT the proper ASGI lifespan management (PTB won't start correctly here).
|
| 1116 |
+
# DO NOT use this for deployment. Use `gunicorn main:app` or `uvicorn main:app`.
|
| 1117 |
if __name__ == '__main__':
|
| 1118 |
+
logger.warning("=" * 50)
|
| 1119 |
+
logger.warning(" RUNNING SCRIPT DIRECTLY (using __main__) ".center(50, "="))
|
| 1120 |
+
logger.warning("=" * 50)
|
| 1121 |
+
logger.warning("This mode starts the Flask development server.")
|
| 1122 |
+
logger.warning("!!! IT DOES **NOT** RUN THE ASGI LIFESPAN !!!")
|
| 1123 |
+
logger.warning("!!! The Telegram Bot (PTB Application) WILL NOT INITIALIZE OR RUN !!!")
|
| 1124 |
+
logger.warning("This is suitable ONLY for verifying Flask routes locally.")
|
| 1125 |
+
logger.warning("For proper testing/deployment, use: uvicorn main:app --reload --port 8080")
|
| 1126 |
+
logger.warning("or via Gunicorn: gunicorn -c gunicorn.conf.py main:app")
|
| 1127 |
+
logger.warning("=" * 50)
|
| 1128 |
+
|
| 1129 |
+
if not TELEGRAM_TOKEN:
|
| 1130 |
+
logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable missing. Aborting direct Flask start.")
|
| 1131 |
else:
|
| 1132 |
+
# Get port from environment or default to 8080 for local dev
|
| 1133 |
local_port = int(os.environ.get('PORT', 8080))
|
| 1134 |
+
logger.info(f"Starting Flask development server on http://0.0.0.0:{local_port}")
|
| 1135 |
+
# Run the Flask app directly (no Starlette, no lifespan, no PTB)
|
| 1136 |
+
# use_reloader=False is important if debugging PTB setup elsewhere
|
| 1137 |
+
flask_core_app.run(host='0.0.0.0', port=local_port, debug=True, use_reloader=False)
|