Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# main.py (
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
@@ -52,6 +52,15 @@ except ImportError:
|
|
52 |
_gemini_available = False
|
53 |
# logger will be defined later, log warning after logger setup
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
# --- Logging Setup ---
|
57 |
logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO )
|
@@ -63,9 +72,14 @@ logging.getLogger('gunicorn.error').setLevel(logging.INFO)
|
|
63 |
logging.getLogger('uvicorn').setLevel(logging.INFO)
|
64 |
logging.getLogger('starlette').setLevel(logging.INFO)
|
65 |
if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
|
|
|
|
|
|
|
|
|
66 |
logger = logging.getLogger(__name__)
|
67 |
logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
|
68 |
if not _gemini_available: logger.warning("google-generativeai library not found. Gemini functionality disabled.")
|
|
|
69 |
|
70 |
|
71 |
# --- Global variable for PTB app ---
|
@@ -81,16 +95,16 @@ def get_secret(secret_name):
|
|
81 |
|
82 |
TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
|
83 |
OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY') # Now Fallback
|
84 |
-
URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
|
85 |
-
SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
|
86 |
-
APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
87 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
88 |
-
GEMINI_API_KEY = get_secret('GEMINI_API_KEY') #
|
89 |
|
90 |
# Models (User can still configure via env vars)
|
91 |
-
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Model
|
92 |
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
|
93 |
-
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash-001") # Primary Model
|
94 |
|
95 |
if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
96 |
if not GEMINI_API_KEY: logger.error("❌ ERROR: GEMINI_API_KEY not found. Primary summarization (Gemini) will fail.")
|
@@ -101,19 +115,29 @@ if not _gemini_available: logger.warning("⚠️ WARNING: google-generativeai li
|
|
101 |
elif not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found or empty. Gemini disabled.")
|
102 |
|
103 |
_openrouter_fallback_enabled = bool(OPENROUTER_API_KEY)
|
104 |
-
if not _openrouter_fallback_enabled: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback disabled.")
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
-
if not URLTOTEXT_API_KEY: pass
|
108 |
-
if not SUPADATA_API_KEY: pass
|
109 |
-
if not APIFY_API_TOKEN: pass
|
110 |
if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
|
111 |
|
112 |
logger.info("Secret loading and configuration check finished.")
|
113 |
-
logger.info(f"
|
114 |
-
logger.info(f"
|
|
|
|
|
|
|
115 |
logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
|
116 |
-
_apify_token_exists = bool(APIFY_API_TOKEN)
|
117 |
|
118 |
|
119 |
if _gemini_primary_enabled:
|
@@ -143,26 +167,8 @@ def extract_youtube_id(url):
|
|
143 |
|
144 |
|
145 |
# --- Content Fetching Functions ---
|
146 |
-
# (fetch_url_content_for_scrape, get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript, get_website_content, get_website_content_via_api remain the same as previous version)
|
147 |
-
async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
|
148 |
-
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
|
149 |
-
try:
|
150 |
-
async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
|
151 |
-
logger.debug(f"[Web Scrape] Sending request to {url}")
|
152 |
-
response = await client.get(url)
|
153 |
-
logger.debug(f"[Web Scrape] Received response {response.status_code} from {url}")
|
154 |
-
response.raise_for_status()
|
155 |
-
content_type = response.headers.get('content-type', '').lower()
|
156 |
-
if 'html' not in content_type: logger.warning(f"[Web Scrape] Non-HTML content type from {url}: {content_type}"); return None
|
157 |
-
try: return response.text
|
158 |
-
except Exception as e: logger.error(f"[Web Scrape] Error decoding response for {url}: {e}"); return None
|
159 |
-
except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape] HTTP error {e.response.status_code} fetching {url}: {e}")
|
160 |
-
except httpx.TimeoutException: logger.error(f"[Web Scrape] Timeout error fetching {url}")
|
161 |
-
except httpx.TooManyRedirects: logger.error(f"[Web Scrape] Too many redirects fetching {url}")
|
162 |
-
except httpx.RequestError as e: logger.error(f"[Web Scrape] Request error fetching {url}: {e}")
|
163 |
-
except Exception as e: logger.error(f"[Web Scrape] Unexpected error fetching {url}: {e}", exc_info=True)
|
164 |
-
return None
|
165 |
|
|
|
166 |
async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
|
167 |
if not video_id: logger.error("[Supadata] No video_id provided"); return None
|
168 |
if not api_key: logger.error("[Supadata] API key missing."); return None
|
@@ -278,55 +284,128 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
|
|
278 |
if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
|
279 |
return transcript_text
|
280 |
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
try:
|
287 |
def parse_html(content):
|
288 |
soup = BeautifulSoup(content, DEFAULT_PARSER)
|
|
|
289 |
for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]): element.extract()
|
290 |
main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
|
291 |
target_element = main_content if main_content else soup.body
|
292 |
-
if not target_element: logger.warning(f"[
|
293 |
lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
|
294 |
text = " ".join(lines)
|
295 |
-
if not text: logger.warning(f"[
|
296 |
return text
|
|
|
297 |
text_content = await asyncio.to_thread(parse_html, html_content)
|
298 |
-
if text_content: logger.info(f"[
|
299 |
-
else:
|
300 |
-
|
|
|
|
|
301 |
|
|
|
302 |
async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
303 |
-
|
304 |
-
|
305 |
-
|
|
|
|
|
|
|
|
|
|
|
306 |
api_endpoint = "https://urltotext.com/api/v1/urltotext/"
|
307 |
payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
|
308 |
headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" }
|
309 |
try:
|
310 |
async with httpx.AsyncClient(timeout=45.0) as client:
|
311 |
-
logger.debug(f"[
|
312 |
response = await client.post(api_endpoint, headers=headers, json=payload)
|
313 |
-
logger.debug(f"[
|
314 |
if response.status_code == 200:
|
315 |
try:
|
316 |
data = response.json()
|
317 |
content = data.get("data", {}).get("content"); credits = data.get("credits_used", "N/A"); warning = data.get("data", {}).get("warning")
|
318 |
-
if warning: logger.warning(f"[
|
319 |
-
if content: logger.info(f"[
|
320 |
-
else: logger.warning(f"[
|
321 |
-
except json.JSONDecodeError: logger.error(f"[
|
322 |
-
except Exception as e: logger.error(f"[
|
323 |
-
elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[
|
324 |
-
else: logger.error(f"[
|
325 |
-
except httpx.TimeoutException: logger.error(f"[
|
326 |
-
except httpx.RequestError as e: logger.error(f"[
|
327 |
-
except Exception as e: logger.error(f"[
|
328 |
-
|
329 |
-
# --- Summarization Functions ---
|
330 |
|
331 |
async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
|
332 |
"""Internal function to call Gemini API. Returns (summary, error_message)."""
|
@@ -336,7 +415,7 @@ async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Opt
|
|
336 |
return None, "Error: Primary AI service (Gemini) not configured/available."
|
337 |
logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
|
338 |
|
339 |
-
# Define prompts
|
340 |
if summary_type == "paragraph":
|
341 |
prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
|
342 |
"• Clear and simple language suitable for someone unfamiliar with the topic.\n"
|
@@ -412,9 +491,6 @@ async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Opt
|
|
412 |
|
413 |
if summary:
|
414 |
logger.info(f"[Gemini Primary] Success generating summary. Output len: {len(summary)}");
|
415 |
-
# Escape Markdown for Telegram only if necessary (Removed escaping as it might conflict with plain heading)
|
416 |
-
# escaped_summary = summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
|
417 |
-
# Let's see if the AI respects the instruction without further escaping. If not, we might need selective escaping.
|
418 |
return summary.strip(), None
|
419 |
else:
|
420 |
finish_reason = response.candidates[0].finish_reason if response.candidates else 'N/A'
|
@@ -433,7 +509,7 @@ async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str],
|
|
433 |
return None, "Error: Fallback AI service (OpenRouter) not configured/available."
|
434 |
logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
435 |
|
436 |
-
# Define prompts
|
437 |
if summary_type == "paragraph":
|
438 |
prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
|
439 |
"• Clear and simple language suitable for someone unfamiliar with the topic.\n"
|
@@ -493,8 +569,6 @@ async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str],
|
|
493 |
summary = message.get("content")
|
494 |
if summary:
|
495 |
logger.info(f"[OpenRouter Fallback] Success via OpenRouter. Output len: {len(summary)}")
|
496 |
-
# Escape Markdown for Telegram only if necessary (Removed escaping)
|
497 |
-
# escaped_summary = summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
|
498 |
return summary.strip(), None
|
499 |
else:
|
500 |
logger.warning(f"[OpenRouter Fallback] OpenRouter success but content empty. Resp: {data}")
|
@@ -585,14 +659,21 @@ async def generate_summary(text: str, summary_type: str) -> str:
|
|
585 |
return "Sorry, an unknown error occurred during summary generation after trying all available models."
|
586 |
|
587 |
|
588 |
-
#
|
589 |
async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
|
590 |
task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
|
591 |
background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
|
592 |
try: background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 ); bot = Bot(token=bot_token, request=background_request)
|
593 |
except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
|
594 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
595 |
try:
|
|
|
596 |
processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
|
597 |
if status_message_id:
|
598 |
try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None ); logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
|
@@ -603,67 +684,132 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
|
|
603 |
if status_message: message_to_delete_later_id = status_message.message_id; logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
|
604 |
else: raise RuntimeError("Failed to send status message after retries.")
|
605 |
except Exception as e: logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}"); raise
|
|
|
606 |
try:
|
607 |
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
608 |
-
is_youtube = is_youtube_url(url)
|
|
|
|
|
|
|
609 |
if is_youtube:
|
610 |
video_id = extract_youtube_id(url)
|
611 |
-
if video_id:
|
612 |
-
|
613 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
614 |
else:
|
615 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
616 |
if not content:
|
617 |
-
|
618 |
-
|
619 |
-
if
|
620 |
-
|
621 |
-
|
622 |
-
|
623 |
-
|
|
|
624 |
if content:
|
625 |
logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
|
626 |
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
627 |
-
|
628 |
-
|
|
|
|
|
|
|
|
|
|
|
629 |
else:
|
630 |
max_length = 4096; summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
|
631 |
-
#
|
632 |
-
#
|
633 |
await retry_bot_operation( bot.send_message, chat_id=chat_id, text=summary_parts[0], parse_mode=None, link_preview_options={'is_disabled': True} )
|
634 |
-
for part in summary_parts[1:]:
|
635 |
-
|
636 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
637 |
except Exception as e:
|
638 |
-
logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True)
|
|
|
639 |
try: await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
|
640 |
except Exception: logger.error(f"[Task {task_id}] Failed to send unexpected error feedback.")
|
|
|
641 |
except Exception as outer_e:
|
642 |
logger.critical(f"[Task {task_id}] Critical outer error: {outer_e}", exc_info=True)
|
643 |
try:
|
644 |
-
if bot: await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ Critical internal error occurred." )
|
645 |
except Exception: logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
|
|
|
646 |
finally:
|
|
|
647 |
delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
|
648 |
if delete_target_id and bot:
|
649 |
try: await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id); logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
|
650 |
except Exception as del_e: logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
|
|
|
|
|
651 |
if background_request and hasattr(background_request, '_client') and background_request._client:
|
652 |
try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
|
653 |
except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
|
654 |
logger.info(f"[Task {task_id}] Task completed. Success: {success}")
|
655 |
|
|
|
|
|
656 |
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
657 |
user = update.effective_user; mention = user.mention_html()
|
658 |
if not user or not update.message: return
|
659 |
logger.info(f"User {user.id} used /start.")
|
660 |
-
await update.message.reply_html( f"👋 Hello {mention}! I can summarise YouTube links or website URLs.\n\nJust send me a link anytime!" )
|
661 |
|
662 |
async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
663 |
user = update.effective_user
|
664 |
if not user or not update.message: return
|
665 |
logger.info(f"User {user.id} used /help.")
|
666 |
-
help_text = ( "🔍 How to use this bot:\n\n" "1. Send me any YouTube video link or website URL.\n" "2. I'll ask you how you want it summarised (paragraph or points).\n"
|
667 |
"3. Click the button for your choice.\n" "4. Wait for the summary!\n\n" "I'll try multiple methods to get content if the first one fails (especially for YouTube transcripts).\n\n" "Commands:\n" "`/start` - Display welcome message\n" "`/help` - Show this help message" )
|
668 |
await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
|
669 |
|
@@ -671,67 +817,111 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
|
|
671 |
if not update.message or not update.message.text: return
|
672 |
url = update.message.text.strip(); user = update.effective_user
|
673 |
if not user: return
|
674 |
-
|
|
|
|
|
|
|
|
|
|
|
675 |
logger.info(f"User {user.id} sent potential URL: {url}")
|
676 |
-
context.user_data['url_to_summarize'] = url
|
|
|
677 |
keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
|
678 |
reply_markup = InlineKeyboardMarkup(keyboard)
|
679 |
-
await update.message.reply_text( f"Okay, I see this link:\n{url}\n\nHow would you like it summarised?", reply_markup=reply_markup, disable_web_page_preview=True )
|
680 |
|
681 |
async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
682 |
query = update.callback_query
|
683 |
if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
|
684 |
user = query.from_user; summary_type = query.data; query_id = query.id
|
685 |
try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id}")
|
686 |
-
except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True)
|
687 |
-
|
|
|
|
|
688 |
logger.info(f"User {user.id} chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
|
|
|
689 |
if not url:
|
690 |
-
logger.warning(f"No URL in context for user {user.id} (cb {query_id}).")
|
691 |
try: await query.edit_message_text(text="Sorry, I couldn't find the URL associated with this request. Please send the link again.")
|
692 |
except Exception as e:
|
693 |
-
logger.error(f"Failed edit 'URL not found'
|
694 |
-
|
695 |
-
|
|
|
696 |
return
|
697 |
|
698 |
-
|
|
|
|
|
|
|
699 |
|
700 |
-
|
|
|
701 |
if not TELEGRAM_TOKEN:
|
702 |
-
logger.critical("
|
703 |
-
try: await query.edit_message_text(text="❌ Bot
|
704 |
-
except Exception: pass
|
705 |
return
|
706 |
if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
|
707 |
-
logger.critical("Neither Gemini nor OpenRouter API keys are configured/valid!")
|
708 |
-
try: await query.edit_message_text(text="❌ AI
|
709 |
-
except Exception: pass
|
710 |
return
|
711 |
elif not _gemini_primary_enabled:
|
712 |
-
logger.warning("Primary AI (Gemini) is unavailable, will rely on fallback.")
|
713 |
-
#
|
714 |
elif not _openrouter_fallback_enabled:
|
715 |
-
logger.warning("Fallback AI (OpenRouter) is unavailable.")
|
716 |
-
#
|
717 |
|
|
|
718 |
logger.info(f"Scheduling task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
|
719 |
-
asyncio.create_task(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
720 |
|
721 |
async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
|
722 |
-
|
723 |
-
|
|
|
|
|
|
|
|
|
724 |
logger.error("Exception while handling an update:", exc_info=context.error)
|
|
|
|
|
|
|
|
|
725 |
|
|
|
726 |
async def setup_bot_config() -> Application:
|
727 |
logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
|
728 |
if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
|
|
|
729 |
custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
|
730 |
-
application = Application.builder().token(TELEGRAM_TOKEN).request(custom_request).build()
|
731 |
-
|
732 |
-
application.add_handler(
|
733 |
-
application.
|
734 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
735 |
@contextlib.asynccontextmanager
|
736 |
async def lifespan(app: Starlette):
|
737 |
global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
|
@@ -739,75 +929,156 @@ async def lifespan(app: Starlette):
|
|
739 |
if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
|
740 |
try:
|
741 |
ptb_app = await setup_bot_config(); await ptb_app.initialize(); bot_info = await ptb_app.bot.get_me(); logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
|
|
|
742 |
current_webhook_info = await ptb_app.bot.get_webhook_info()
|
743 |
if current_webhook_info and current_webhook_info.url:
|
744 |
logger.info(f"Found existing webhook: {current_webhook_info.url}. Deleting...")
|
745 |
try:
|
746 |
if await ptb_app.bot.delete_webhook(drop_pending_updates=True): logger.info("Webhook deleted.")
|
747 |
else: logger.warning("Failed delete webhook (API returned False).")
|
748 |
-
except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1)
|
749 |
-
|
|
|
|
|
|
|
750 |
if space_host:
|
751 |
-
protocol = "https"
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
765 |
except Exception as startup_err:
|
766 |
logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
|
|
|
767 |
if ptb_app:
|
768 |
if ptb_app.running: await ptb_app.stop()
|
769 |
await ptb_app.shutdown()
|
770 |
-
raise
|
771 |
-
|
|
|
772 |
logger.info("ASGI Lifespan: Shutdown initiated...")
|
773 |
if ptb_app:
|
774 |
-
if ptb_app.running:
|
775 |
-
|
776 |
-
|
|
|
|
|
|
|
|
|
|
|
777 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
778 |
|
779 |
async def health_check(request: Request) -> PlainTextResponse:
|
780 |
-
global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled
|
781 |
bot_status = "Not Initialized"
|
782 |
if ptb_app and ptb_app.bot:
|
783 |
try:
|
784 |
-
if ptb_app.running:
|
|
|
|
|
785 |
else: bot_status = "Initialized/Not running"
|
786 |
except Exception as e: bot_status = f"Error checking status: {e}"
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
791 |
|
792 |
async def telegram_webhook(request: Request) -> Response:
|
793 |
global WEBHOOK_SECRET
|
794 |
-
if not ptb_app:
|
795 |
-
|
|
|
|
|
|
|
|
|
|
|
796 |
try:
|
|
|
797 |
if WEBHOOK_SECRET:
|
798 |
token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
|
799 |
-
if token_header != WEBHOOK_SECRET:
|
800 |
-
|
801 |
-
|
802 |
-
|
803 |
-
|
804 |
-
|
805 |
-
|
806 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
807 |
|
|
|
808 |
if __name__ == '__main__':
|
809 |
import uvicorn
|
810 |
-
logger.warning("Running in development mode using Uvicorn directly")
|
811 |
log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
|
|
|
812 |
local_port = int(os.environ.get('PORT', 8080))
|
|
|
|
|
|
|
|
|
813 |
uvicorn.run("__main__:app", host='0.0.0.0', port=local_port, log_level=log_level, reload=True)
|
|
|
1 |
+
# main.py (Modified to add crawl4ai and adjust fetching logic)
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
|
|
52 |
_gemini_available = False
|
53 |
# logger will be defined later, log warning after logger setup
|
54 |
|
55 |
+
# --- Crawl4AI (New Primary Web Scraper) ---
|
56 |
+
try:
|
57 |
+
from crawl4ai import AsyncWebCrawler
|
58 |
+
_crawl4ai_available = True
|
59 |
+
except ImportError:
|
60 |
+
AsyncWebCrawler = None
|
61 |
+
_crawl4ai_available = False
|
62 |
+
# logger will be defined later
|
63 |
+
|
64 |
|
65 |
# --- Logging Setup ---
|
66 |
logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO )
|
|
|
72 |
logging.getLogger('uvicorn').setLevel(logging.INFO)
|
73 |
logging.getLogger('starlette').setLevel(logging.INFO)
|
74 |
if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
|
75 |
+
# Reduce crawl4ai logging noise if needed
|
76 |
+
if _crawl4ai_available:
|
77 |
+
logging.getLogger("crawl4ai").setLevel(logging.WARNING) # Or INFO for more detail
|
78 |
+
logging.getLogger("playwright").setLevel(logging.WARNING)
|
79 |
logger = logging.getLogger(__name__)
|
80 |
logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
|
81 |
if not _gemini_available: logger.warning("google-generativeai library not found. Gemini functionality disabled.")
|
82 |
+
if not _crawl4ai_available: logger.warning("crawl4ai library not found. Primary website scraping will be disabled.")
|
83 |
|
84 |
|
85 |
# --- Global variable for PTB app ---
|
|
|
95 |
|
96 |
TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
|
97 |
OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY') # Now Fallback
|
98 |
+
URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY') # Fallback Web 2
|
99 |
+
SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY') # Fallback YT 1
|
100 |
+
APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN') # Fallback YT 2
|
101 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
102 |
+
GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Primary Summarizer
|
103 |
|
104 |
# Models (User can still configure via env vars)
|
105 |
+
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Summarizer Model
|
106 |
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
|
107 |
+
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash-001") # Primary Summarizer Model
|
108 |
|
109 |
if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
110 |
if not GEMINI_API_KEY: logger.error("❌ ERROR: GEMINI_API_KEY not found. Primary summarization (Gemini) will fail.")
|
|
|
115 |
elif not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found or empty. Gemini disabled.")
|
116 |
|
117 |
_openrouter_fallback_enabled = bool(OPENROUTER_API_KEY)
|
118 |
+
if not _openrouter_fallback_enabled: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Fallback summarization disabled.")
|
119 |
|
120 |
+
_crawl4ai_primary_web_enabled = _crawl4ai_available
|
121 |
+
if not _crawl4ai_primary_web_enabled: logger.warning("⚠️ WARNING: crawl4ai library not found. Primary web scraping disabled.")
|
122 |
+
|
123 |
+
_bs4_fallback_web_enabled = True # Assumes bs4 is always available
|
124 |
+
_urltotext_fallback_web_enabled = bool(URLTOTEXT_API_KEY)
|
125 |
+
if not _urltotext_fallback_web_enabled: logger.info("ℹ️ INFO: URLTOTEXT_API_KEY not found. Secondary web fallback disabled.")
|
126 |
+
|
127 |
+
# Fallback YT checks
|
128 |
+
if not SUPADATA_API_KEY: logger.info("ℹ️ INFO: SUPADATA_API_KEY not found. First YT fallback disabled.")
|
129 |
+
_apify_token_exists = bool(APIFY_API_TOKEN)
|
130 |
+
if not _apify_token_exists: logger.info("ℹ️ INFO: APIFY_API_TOKEN not found. Second YT fallback disabled.")
|
131 |
|
|
|
|
|
|
|
132 |
if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
|
133 |
|
134 |
logger.info("Secret loading and configuration check finished.")
|
135 |
+
logger.info(f"Primary Web Scraper: {'Crawl4AI' if _crawl4ai_primary_web_enabled else 'DISABLED'}")
|
136 |
+
logger.info(f"Fallback Web Scraper 1: {'BeautifulSoup' if _bs4_fallback_web_enabled else 'DISABLED'}")
|
137 |
+
logger.info(f"Fallback Web Scraper 2: {'urltotext.com API' if _urltotext_fallback_web_enabled else 'DISABLED'}")
|
138 |
+
logger.info(f"Using Gemini Model (Primary Summarizer): {GEMINI_MODEL if _gemini_primary_enabled else 'DISABLED'}")
|
139 |
+
logger.info(f"Using OpenRouter Model (Fallback Summarizer): {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}")
|
140 |
logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
|
|
|
141 |
|
142 |
|
143 |
if _gemini_primary_enabled:
|
|
|
167 |
|
168 |
|
169 |
# --- Content Fetching Functions ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
+
# --- YouTube Fetching (Unchanged) ---
|
172 |
async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
|
173 |
if not video_id: logger.error("[Supadata] No video_id provided"); return None
|
174 |
if not api_key: logger.error("[Supadata] API key missing."); return None
|
|
|
284 |
if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
|
285 |
return transcript_text
|
286 |
|
287 |
+
# --- Website Content Fetching ---
|
288 |
+
|
289 |
+
# NEW: Primary Method using Crawl4AI
|
290 |
+
async def get_website_content_via_crawl4ai(url: str) -> Optional[str]:
|
291 |
+
"""Fetches website content using Crawl4AI. Returns Markdown."""
|
292 |
+
global _crawl4ai_primary_web_enabled
|
293 |
+
if not _crawl4ai_primary_web_enabled:
|
294 |
+
logger.warning("[Crawl4AI Primary] Called but disabled/unavailable.")
|
295 |
+
return None
|
296 |
+
if not url: logger.error("[Crawl4AI Primary] No URL provided"); return None
|
297 |
+
|
298 |
+
logger.info(f"[Crawl4AI Primary] Attempting fetch for: {url}")
|
299 |
+
try:
|
300 |
+
# Using async with for proper resource cleanup
|
301 |
+
async with AsyncWebCrawler(headless=True) as crawler: # Headless is generally preferred for server environments
|
302 |
+
# Timeout can be added here if needed: crawler_params={"timeout": 60000} # milliseconds
|
303 |
+
result = await crawler.arun(url=url)
|
304 |
+
if result and result.markdown:
|
305 |
+
logger.info(f"[Crawl4AI Primary] Success via Crawl4AI for {url}. Markdown length: {len(result.markdown)}")
|
306 |
+
return result.markdown.strip()
|
307 |
+
elif result and not result.markdown:
|
308 |
+
logger.warning(f"[Crawl4AI Primary] Crawl4AI ran but returned empty markdown for {url}.")
|
309 |
+
return None
|
310 |
+
else:
|
311 |
+
logger.warning(f"[Crawl4AI Primary] Crawl4AI returned no result object for {url}.")
|
312 |
+
return None
|
313 |
+
except ImportError:
|
314 |
+
logger.error("[Crawl4AI Primary] Import Error - library might be missing.")
|
315 |
+
_crawl4ai_primary_web_enabled = False # Disable if import fails at runtime
|
316 |
+
return None
|
317 |
+
except Exception as e:
|
318 |
+
# Catch potential Playwright errors, timeouts, or other issues
|
319 |
+
logger.error(f"[Crawl4AI Primary] Error during Crawl4AI execution for {url}: {e}", exc_info=True)
|
320 |
+
return None
|
321 |
+
|
322 |
+
# HELPER: Used by Fallback 1 (BS4) - no changes needed here
|
323 |
+
async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
|
324 |
+
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
|
325 |
+
try:
|
326 |
+
async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
|
327 |
+
logger.debug(f"[Web Scrape Helper] Sending request to {url}")
|
328 |
+
response = await client.get(url)
|
329 |
+
logger.debug(f"[Web Scrape Helper] Received response {response.status_code} from {url}")
|
330 |
+
response.raise_for_status()
|
331 |
+
content_type = response.headers.get('content-type', '').lower()
|
332 |
+
if 'html' not in content_type: logger.warning(f"[Web Scrape Helper] Non-HTML content type from {url}: {content_type}"); return None
|
333 |
+
try: return response.text
|
334 |
+
except Exception as e: logger.error(f"[Web Scrape Helper] Error decoding response for {url}: {e}"); return None
|
335 |
+
except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape Helper] HTTP error {e.response.status_code} fetching {url}: {e}")
|
336 |
+
except httpx.TimeoutException: logger.error(f"[Web Scrape Helper] Timeout error fetching {url}")
|
337 |
+
except httpx.TooManyRedirects: logger.error(f"[Web Scrape Helper] Too many redirects fetching {url}")
|
338 |
+
except httpx.RequestError as e: logger.error(f"[Web Scrape Helper] Request error fetching {url}: {e}")
|
339 |
+
except Exception as e: logger.error(f"[Web Scrape Helper] Unexpected error fetching {url}: {e}", exc_info=True)
|
340 |
+
return None
|
341 |
+
|
342 |
+
# Fallback 1: Direct BS4 Scraping (Renamed original function)
|
343 |
+
async def get_website_content_via_bs4(url: str) -> Optional[str]:
|
344 |
+
"""Fetches and extracts text content using BeautifulSoup (Fallback 1)."""
|
345 |
+
global _bs4_fallback_web_enabled
|
346 |
+
if not _bs4_fallback_web_enabled:
|
347 |
+
logger.warning("[BS4 Fallback] Called but disabled.") # Should not happen unless manually disabled
|
348 |
+
return None
|
349 |
+
if not url: logger.error("[BS4 Fallback] No URL provided"); return None
|
350 |
+
logger.info(f"[BS4 Fallback] Fetching website content for: {url}")
|
351 |
+
html_content = await fetch_url_content_for_scrape(url) # Use the helper
|
352 |
+
if not html_content:
|
353 |
+
logger.warning(f"[BS4 Fallback] fetch_url_content_for_scrape failed for {url}")
|
354 |
+
return None
|
355 |
try:
|
356 |
def parse_html(content):
|
357 |
soup = BeautifulSoup(content, DEFAULT_PARSER)
|
358 |
+
# Keep the existing cleaning logic
|
359 |
for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure"]): element.extract()
|
360 |
main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main')
|
361 |
target_element = main_content if main_content else soup.body
|
362 |
+
if not target_element: logger.warning(f"[BS4 Fallback] Could not find body/main for parsing {url}"); return None
|
363 |
lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
|
364 |
text = " ".join(lines)
|
365 |
+
if not text: logger.warning(f"[BS4 Fallback] Extracted text empty after clean for {url}"); return None
|
366 |
return text
|
367 |
+
|
368 |
text_content = await asyncio.to_thread(parse_html, html_content)
|
369 |
+
if text_content: logger.info(f"[BS4 Fallback] Success scrape for {url} (final len: {len(text_content)})"); return text_content
|
370 |
+
else:
|
371 |
+
logger.warning(f"[BS4 Fallback] parse_html returned None for {url}")
|
372 |
+
return None
|
373 |
+
except Exception as e: logger.error(f"[BS4 Fallback] Error scraping/parsing {url}: {e}", exc_info=True); return None
|
374 |
|
375 |
+
# Fallback 2: urltotext.com API (Unchanged function)
|
376 |
async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
377 |
+
"""Fetches website content using urltotext.com API (Fallback 2)."""
|
378 |
+
global _urltotext_fallback_web_enabled
|
379 |
+
if not _urltotext_fallback_web_enabled:
|
380 |
+
logger.warning("[urltotext API Fallback] Called but disabled (no API key).")
|
381 |
+
return None
|
382 |
+
if not url: logger.error("[urltotext API Fallback] No URL"); return None
|
383 |
+
if not api_key: logger.error("[urltotext API Fallback] urltotext.com API key missing."); return None # Redundant check but safe
|
384 |
+
logger.info(f"[urltotext API Fallback] Attempting fetch for: {url} using urltotext.com API")
|
385 |
api_endpoint = "https://urltotext.com/api/v1/urltotext/"
|
386 |
payload = { "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }
|
387 |
headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" }
|
388 |
try:
|
389 |
async with httpx.AsyncClient(timeout=45.0) as client:
|
390 |
+
logger.debug(f"[urltotext API Fallback] Sending request to urltotext.com API for {url}")
|
391 |
response = await client.post(api_endpoint, headers=headers, json=payload)
|
392 |
+
logger.debug(f"[urltotext API Fallback] Received status {response.status_code} from urltotext.com API for {url}")
|
393 |
if response.status_code == 200:
|
394 |
try:
|
395 |
data = response.json()
|
396 |
content = data.get("data", {}).get("content"); credits = data.get("credits_used", "N/A"); warning = data.get("data", {}).get("warning")
|
397 |
+
if warning: logger.warning(f"[urltotext API Fallback] urltotext.com API Warning for {url}: {warning}")
|
398 |
+
if content: logger.info(f"[urltotext API Fallback] Success via urltotext.com API for {url}. Len: {len(content)}. Credits: {credits}"); return content.strip()
|
399 |
+
else: logger.warning(f"[urltotext API Fallback] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
|
400 |
+
except json.JSONDecodeError: logger.error(f"[urltotext API Fallback] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
|
401 |
+
except Exception as e: logger.error(f"[urltotext API Fallback] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
|
402 |
+
elif response.status_code in [400, 401, 402, 403, 422, 500]: logger.error(f"[urltotext API Fallback] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
|
403 |
+
else: logger.error(f"[urltotext API Fallback] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
|
404 |
+
except httpx.TimeoutException: logger.error(f"[urltotext API Fallback] Timeout connecting to urltotext.com API for {url}"); return None
|
405 |
+
except httpx.RequestError as e: logger.error(f"[urltotext API Fallback] Request error connecting to urltotext.com API for {url}: {e}"); return None
|
406 |
+
except Exception as e: logger.error(f"[urltotext API Fallback] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
407 |
+
|
408 |
+
# --- Summarization Functions (Unchanged) ---
|
409 |
|
410 |
async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
|
411 |
"""Internal function to call Gemini API. Returns (summary, error_message)."""
|
|
|
415 |
return None, "Error: Primary AI service (Gemini) not configured/available."
|
416 |
logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
|
417 |
|
418 |
+
# Define prompts (Keep existing prompts)
|
419 |
if summary_type == "paragraph":
|
420 |
prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
|
421 |
"• Clear and simple language suitable for someone unfamiliar with the topic.\n"
|
|
|
491 |
|
492 |
if summary:
|
493 |
logger.info(f"[Gemini Primary] Success generating summary. Output len: {len(summary)}");
|
|
|
|
|
|
|
494 |
return summary.strip(), None
|
495 |
else:
|
496 |
finish_reason = response.candidates[0].finish_reason if response.candidates else 'N/A'
|
|
|
509 |
return None, "Error: Fallback AI service (OpenRouter) not configured/available."
|
510 |
logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
511 |
|
512 |
+
# Define prompts (Keep existing prompts)
|
513 |
if summary_type == "paragraph":
|
514 |
prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n"
|
515 |
"• Clear and simple language suitable for someone unfamiliar with the topic.\n"
|
|
|
569 |
summary = message.get("content")
|
570 |
if summary:
|
571 |
logger.info(f"[OpenRouter Fallback] Success via OpenRouter. Output len: {len(summary)}")
|
|
|
|
|
572 |
return summary.strip(), None
|
573 |
else:
|
574 |
logger.warning(f"[OpenRouter Fallback] OpenRouter success but content empty. Resp: {data}")
|
|
|
659 |
return "Sorry, an unknown error occurred during summary generation after trying all available models."
|
660 |
|
661 |
|
662 |
+
# --- Main Task Processing (Modified Web Fetching Logic) ---
|
663 |
async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
|
664 |
task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
|
665 |
background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
|
666 |
try: background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 ); bot = Bot(token=bot_token, request=background_request)
|
667 |
except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
|
668 |
+
|
669 |
+
content: Optional[str] = None
|
670 |
+
user_feedback_message: Optional[str] = None
|
671 |
+
success = False
|
672 |
+
status_message_id = message_id_to_edit
|
673 |
+
message_to_delete_later_id : Optional[int] = None
|
674 |
+
|
675 |
try:
|
676 |
+
# Send initial "Processing..." message (or edit existing)
|
677 |
processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
|
678 |
if status_message_id:
|
679 |
try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=processing_message_text, parse_mode=ParseMode.MARKDOWN, reply_markup=None ); logger.debug(f"[Task {task_id}] Edited message {status_message_id} to 'Processing'")
|
|
|
684 |
if status_message: message_to_delete_later_id = status_message.message_id; logger.debug(f"[Task {task_id}] Sent new status message {message_to_delete_later_id}")
|
685 |
else: raise RuntimeError("Failed to send status message after retries.")
|
686 |
except Exception as e: logger.error(f"[Task {task_id}] CRITICAL: Failed to send new status message: {e}"); raise
|
687 |
+
|
688 |
try:
|
689 |
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
690 |
+
is_youtube = is_youtube_url(url)
|
691 |
+
logger.debug(f"[Task {task_id}] URL type: {'YouTube' if is_youtube else 'Website'}")
|
692 |
+
|
693 |
+
# --- YouTube Processing (Unchanged) ---
|
694 |
if is_youtube:
|
695 |
video_id = extract_youtube_id(url)
|
696 |
+
if video_id:
|
697 |
+
content = await get_youtube_transcript(video_id, url) # Tries lib -> Supadata -> Apify
|
698 |
+
else:
|
699 |
+
user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
|
700 |
+
|
701 |
+
if not content and not user_feedback_message:
|
702 |
+
user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
|
703 |
+
|
704 |
+
# --- Website Processing (NEW Logic) ---
|
705 |
else:
|
706 |
+
# Method 1: Crawl4AI (Primary)
|
707 |
+
if _crawl4ai_primary_web_enabled:
|
708 |
+
logger.info(f"[Task {task_id}] Trying primary web method: Crawl4AI")
|
709 |
+
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
710 |
+
content = await get_website_content_via_crawl4ai(url)
|
711 |
+
if content:
|
712 |
+
logger.info(f"[Task {task_id}] Success via Crawl4AI for {url} (len: {len(content)})")
|
713 |
+
else:
|
714 |
+
logger.warning(f"[Task {task_id}] Crawl4AI failed or returned empty for {url}.")
|
715 |
+
else:
|
716 |
+
logger.warning(f"[Task {task_id}] Crawl4AI is disabled. Skipping.")
|
717 |
+
|
718 |
+
# Method 2: BeautifulSoup (Fallback 1)
|
719 |
+
if not content and _bs4_fallback_web_enabled:
|
720 |
+
logger.warning(f"[Task {task_id}] Trying fallback web method 1: BeautifulSoup")
|
721 |
+
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
722 |
+
content = await get_website_content_via_bs4(url) # Use the renamed function
|
723 |
+
if content:
|
724 |
+
logger.info(f"[Task {task_id}] Success via BS4 scrape for {url} (len: {len(content)})")
|
725 |
+
else:
|
726 |
+
logger.warning(f"[Task {task_id}] BS4 scrape failed or returned empty for {url}.")
|
727 |
+
|
728 |
+
# Method 3: urltotext.com API (Fallback 2)
|
729 |
+
if not content and _urltotext_fallback_web_enabled:
|
730 |
+
logger.warning(f"[Task {task_id}] Trying fallback web method 2: urltotext.com API")
|
731 |
+
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
732 |
+
content = await get_website_content_via_api(url, URLTOTEXT_API_KEY) # API key checked inside function
|
733 |
+
if content:
|
734 |
+
logger.info(f"[Task {task_id}] Success via urltotext.com API for {url} (len: {len(content)})")
|
735 |
+
else:
|
736 |
+
logger.warning(f"[Task {task_id}] urltotext.com API failed or returned empty for {url}.")
|
737 |
+
|
738 |
+
# Final check if any website method succeeded
|
739 |
if not content:
|
740 |
+
methods_tried = []
|
741 |
+
if _crawl4ai_primary_web_enabled: methods_tried.append("Crawl4AI")
|
742 |
+
if _bs4_fallback_web_enabled: methods_tried.append("BS4")
|
743 |
+
if _urltotext_fallback_web_enabled: methods_tried.append("API")
|
744 |
+
tried_str = ", ".join(methods_tried) if methods_tried else "configured methods"
|
745 |
+
user_feedback_message = f"Sorry, I couldn't fetch content from that website using any available method ({tried_str}). It might be blocked, inaccessible, or empty."
|
746 |
+
|
747 |
+
# --- Summarization ---
|
748 |
if content:
|
749 |
logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
|
750 |
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
751 |
+
# NOTE: `content` might be Markdown (from Crawl4AI) or plain text (from others).
|
752 |
+
# The LLM prompts should handle this reasonably well.
|
753 |
+
final_summary = await generate_summary(content, summary_type) # Calls Gemini -> OpenRouter
|
754 |
+
|
755 |
+
if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
|
756 |
+
user_feedback_message = final_summary # Pass AI error message to user
|
757 |
+
logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
|
758 |
else:
|
759 |
max_length = 4096; summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
|
760 |
+
# Send summary without explicit Markdown parsing, assuming LLM followed instructions
|
761 |
+
# for plain headings and standard bullet points. Using parse_mode=None.
|
762 |
await retry_bot_operation( bot.send_message, chat_id=chat_id, text=summary_parts[0], parse_mode=None, link_preview_options={'is_disabled': True} )
|
763 |
+
for part in summary_parts[1:]:
|
764 |
+
await asyncio.sleep(0.5)
|
765 |
+
await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=None, link_preview_options={'is_disabled': True} )
|
766 |
+
success = True
|
767 |
+
logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts).")
|
768 |
+
user_feedback_message = None # Clear any previous fetch error if summary succeeded
|
769 |
+
|
770 |
+
# --- Send Feedback if Fetching Failed ---
|
771 |
+
elif user_feedback_message:
|
772 |
+
logger.warning(f"[Task {task_id}] Sending failure feedback: {user_feedback_message}")
|
773 |
+
await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )
|
774 |
+
|
775 |
except Exception as e:
|
776 |
+
logger.error(f"[Task {task_id}] Unexpected error during processing: {e}", exc_info=True)
|
777 |
+
user_feedback_message = "Oops! Something went really wrong while processing your request. Please try again later."
|
778 |
try: await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
|
779 |
except Exception: logger.error(f"[Task {task_id}] Failed to send unexpected error feedback.")
|
780 |
+
|
781 |
except Exception as outer_e:
|
782 |
logger.critical(f"[Task {task_id}] Critical outer error: {outer_e}", exc_info=True)
|
783 |
try:
|
784 |
+
if bot: await retry_bot_operation( bot.send_message, chat_id=chat_id, text="❌ Critical internal error occurred processing your request." )
|
785 |
except Exception: logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
|
786 |
+
|
787 |
finally:
|
788 |
+
# Delete the "Processing..." or button message
|
789 |
delete_target_id = message_to_delete_later_id if message_to_delete_later_id else status_message_id
|
790 |
if delete_target_id and bot:
|
791 |
try: await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=delete_target_id); logger.debug(f"[Task {task_id}] Deleted status/button message {delete_target_id}")
|
792 |
except Exception as del_e: logger.warning(f"[Task {task_id}] Failed to delete status/button message {delete_target_id}: {del_e}")
|
793 |
+
|
794 |
+
# Close the background bot's HTTP client
|
795 |
if background_request and hasattr(background_request, '_client') and background_request._client:
|
796 |
try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
|
797 |
except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
|
798 |
logger.info(f"[Task {task_id}] Task completed. Success: {success}")
|
799 |
|
800 |
+
|
801 |
+
# --- Bot Handlers (Unchanged) ---
|
802 |
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
803 |
user = update.effective_user; mention = user.mention_html()
|
804 |
if not user or not update.message: return
|
805 |
logger.info(f"User {user.id} used /start.")
|
806 |
+
await update.message.reply_html( f"👋 Hello {mention}! I can summarise YouTube links or website URLs.\n\nJust send me a link anytime!" )
|
807 |
|
808 |
async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
809 |
user = update.effective_user
|
810 |
if not user or not update.message: return
|
811 |
logger.info(f"User {user.id} used /help.")
|
812 |
+
help_text = ( "🔍 How to use this bot:\n\n" "1. Send me any YouTube video link or website URL.\n" "2. I'll ask you how you want it summarised (paragraph or points).\n"
|
813 |
"3. Click the button for your choice.\n" "4. Wait for the summary!\n\n" "I'll try multiple methods to get content if the first one fails (especially for YouTube transcripts).\n\n" "Commands:\n" "`/start` - Display welcome message\n" "`/help` - Show this help message" )
|
814 |
await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
|
815 |
|
|
|
817 |
if not update.message or not update.message.text: return
|
818 |
url = update.message.text.strip(); user = update.effective_user
|
819 |
if not user: return
|
820 |
+
# Basic URL validation
|
821 |
+
if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]:
|
822 |
+
logger.debug(f"Ignoring non-URL message from {user.id}")
|
823 |
+
# Optional: Reply if it looks like they tried to send something else?
|
824 |
+
# await update.message.reply_text("Please send a valid website URL (starting with http:// or https://) or a YouTube link.")
|
825 |
+
return
|
826 |
logger.info(f"User {user.id} sent potential URL: {url}")
|
827 |
+
context.user_data['url_to_summarize'] = url
|
828 |
+
context.user_data['original_message_id'] = update.message.message_id # Store original message ID if needed later
|
829 |
keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
|
830 |
reply_markup = InlineKeyboardMarkup(keyboard)
|
831 |
+
await update.message.reply_text( f"Okay, I see this link:\n{url}\n\nHow would you like it summarised?", reply_markup=reply_markup, disable_web_page_preview=True )
|
832 |
|
833 |
async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
834 |
query = update.callback_query
|
835 |
if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
|
836 |
user = query.from_user; summary_type = query.data; query_id = query.id
|
837 |
try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id}")
|
838 |
+
except Exception as e: logger.error(f"Error answering callback {query_id}: {e}", exc_info=True) # Log but continue
|
839 |
+
|
840 |
+
url = context.user_data.get('url_to_summarize')
|
841 |
+
message_id_to_edit = query.message.message_id # The message with the buttons
|
842 |
logger.info(f"User {user.id} chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
|
843 |
+
|
844 |
if not url:
|
845 |
+
logger.warning(f"No URL in context for user {user.id} (cb {query_id}). Might be an old button.")
|
846 |
try: await query.edit_message_text(text="Sorry, I couldn't find the URL associated with this request. Please send the link again.")
|
847 |
except Exception as e:
|
848 |
+
logger.error(f"Failed to edit 'URL not found' message: {e}")
|
849 |
+
# Attempt to send a new message as a fallback
|
850 |
+
try: await context.bot.send_message(chat_id=user.id, text="Sorry, the context for your previous request seems to have expired. Please send the link again.")
|
851 |
+
except Exception as send_e: logger.error(f"Failed even to send new message about lost context: {send_e}")
|
852 |
return
|
853 |
|
854 |
+
# Clear context *after* checking it exists
|
855 |
+
context.user_data.pop('url_to_summarize', None)
|
856 |
+
context.user_data.pop('original_message_id', None) # Clear original ID too
|
857 |
+
logger.debug(f"Cleared URL context for user {user.id}")
|
858 |
|
859 |
+
# Check critical configurations before scheduling task
|
860 |
+
global TELEGRAM_TOKEN, _gemini_primary_enabled, _openrouter_fallback_enabled
|
861 |
if not TELEGRAM_TOKEN:
|
862 |
+
logger.critical("TELEGRAM_TOKEN missing! Cannot schedule task.")
|
863 |
+
try: await query.edit_message_text(text="❌ Bot configuration error (Token). Task cannot be started.")
|
864 |
+
except Exception: pass # Ignore if edit fails
|
865 |
return
|
866 |
if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
|
867 |
+
logger.critical("Neither Gemini nor OpenRouter API keys are configured/valid! Cannot schedule task.")
|
868 |
+
try: await query.edit_message_text(text="❌ AI configuration error: No summarization models available. Task cannot be started.")
|
869 |
+
except Exception: pass # Ignore if edit fails
|
870 |
return
|
871 |
elif not _gemini_primary_enabled:
|
872 |
+
logger.warning("Primary AI (Gemini) is unavailable, will rely on fallback for summarization.")
|
873 |
+
# User will be notified by generate_summary if fallback also fails
|
874 |
elif not _openrouter_fallback_enabled:
|
875 |
+
logger.warning("Fallback AI (OpenRouter) is unavailable for summarization.")
|
876 |
+
# User will be notified by generate_summary if primary fails
|
877 |
|
878 |
+
# Schedule the background task
|
879 |
logger.info(f"Scheduling task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
|
880 |
+
asyncio.create_task(
|
881 |
+
process_summary_task(
|
882 |
+
user_id=user.id,
|
883 |
+
chat_id=query.message.chat_id,
|
884 |
+
message_id_to_edit=message_id_to_edit, # Pass the button message ID to edit/delete
|
885 |
+
url=url,
|
886 |
+
summary_type=summary_type,
|
887 |
+
bot_token=TELEGRAM_TOKEN
|
888 |
+
),
|
889 |
+
name=f"SummaryTask-{user.id}-{message_id_to_edit}"
|
890 |
+
)
|
891 |
+
# Don't edit the message here; the task will handle it immediately.
|
892 |
|
893 |
async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
|
894 |
+
# Ignore specific, known errors if necessary
|
895 |
+
ignore_errors = (AttributeError, ) # Example
|
896 |
+
if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error):
|
897 |
+
logger.debug(f"Ignoring known/handled error in error_handler: {context.error}")
|
898 |
+
return
|
899 |
+
|
900 |
logger.error("Exception while handling an update:", exc_info=context.error)
|
901 |
+
# Optionally, try to inform the user if it's a direct message context
|
902 |
+
# if isinstance(update, Update) and update.effective_chat:
|
903 |
+
# try: await context.bot.send_message(chat_id=update.effective_chat.id, text="An internal error occurred.")
|
904 |
+
# except Exception: logger.error("Failed to send error message to user.")
|
905 |
|
906 |
+
# --- Application Setup (Unchanged) ---
|
907 |
async def setup_bot_config() -> Application:
|
908 |
logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
|
909 |
if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
|
910 |
+
# Configure HTTPX client for PTB
|
911 |
custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
|
912 |
+
application = ( Application.builder() .token(TELEGRAM_TOKEN) .request(custom_request) .build() )
|
913 |
+
# Add handlers
|
914 |
+
application.add_handler(CommandHandler("start", start))
|
915 |
+
application.add_handler(CommandHandler("help", help_command))
|
916 |
+
# Message handler for potential URLs (non-command text)
|
917 |
+
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
|
918 |
+
# Callback handler for summary type buttons
|
919 |
+
application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
|
920 |
+
# Error handler
|
921 |
+
application.add_error_handler(error_handler)
|
922 |
+
logger.info("Telegram application handlers configured."); return application
|
923 |
+
|
924 |
+
# --- ASGI Lifespan & Webhook (Unchanged) ---
|
925 |
@contextlib.asynccontextmanager
|
926 |
async def lifespan(app: Starlette):
|
927 |
global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
|
|
|
929 |
if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
|
930 |
try:
|
931 |
ptb_app = await setup_bot_config(); await ptb_app.initialize(); bot_info = await ptb_app.bot.get_me(); logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
|
932 |
+
# Webhook setup logic
|
933 |
current_webhook_info = await ptb_app.bot.get_webhook_info()
|
934 |
if current_webhook_info and current_webhook_info.url:
|
935 |
logger.info(f"Found existing webhook: {current_webhook_info.url}. Deleting...")
|
936 |
try:
|
937 |
if await ptb_app.bot.delete_webhook(drop_pending_updates=True): logger.info("Webhook deleted.")
|
938 |
else: logger.warning("Failed delete webhook (API returned False).")
|
939 |
+
except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1) # Brief pause before setting new one
|
940 |
+
|
941 |
+
space_host = os.environ.get("SPACE_HOST") # Provided by Hugging Face Spaces
|
942 |
+
webhook_path = "/webhook" # Matches the Starlette route below
|
943 |
+
full_webhook_url = None
|
944 |
if space_host:
|
945 |
+
protocol = "https"
|
946 |
+
host = space_host.split('://')[-1] # Remove potential protocol prefix from env var
|
947 |
+
full_webhook_url = f"{protocol}://{host.rstrip('/')}{webhook_path}"
|
948 |
+
|
949 |
+
if full_webhook_url:
|
950 |
+
logger.info(f"Setting webhook: {full_webhook_url}")
|
951 |
+
set_webhook_args: Dict[str, Any] = {
|
952 |
+
"url": full_webhook_url,
|
953 |
+
"allowed_updates": Update.ALL_TYPES,
|
954 |
+
"drop_pending_updates": True
|
955 |
+
}
|
956 |
+
if WEBHOOK_SECRET:
|
957 |
+
set_webhook_args["secret_token"] = WEBHOOK_SECRET
|
958 |
+
logger.info("Using webhook secret token.")
|
959 |
+
|
960 |
+
await asyncio.sleep(1.0) # Short delay before setting webhook
|
961 |
+
|
962 |
+
try:
|
963 |
+
await ptb_app.bot.set_webhook(**set_webhook_args)
|
964 |
+
webhook_info = await ptb_app.bot.get_webhook_info() # Verify
|
965 |
+
if webhook_info.url == full_webhook_url:
|
966 |
+
logger.info(f"Webhook set successfully: URL='{webhook_info.url}', Secret Configured={bool(WEBHOOK_SECRET)}")
|
967 |
+
else:
|
968 |
+
logger.error(f"Webhook URL mismatch after setting! Expected '{full_webhook_url}', Got '{webhook_info.url}'. Check SPACE_HOST env var.")
|
969 |
+
raise RuntimeError("Webhook URL mismatch after setting.")
|
970 |
+
|
971 |
+
await ptb_app.start() # Start listening for updates via webhook
|
972 |
+
logger.info("PTB Application started (webhook mode).")
|
973 |
+
|
974 |
+
except Exception as e:
|
975 |
+
logger.critical(f"FATAL: Failed to set webhook: {e}", exc_info=True)
|
976 |
+
raise RuntimeError(f"Failed to set webhook: {e}") from e
|
977 |
+
else:
|
978 |
+
logger.critical("Could not construct webhook URL. SPACE_HOST environment variable might be missing or invalid.")
|
979 |
+
raise RuntimeError("Webhook URL could not be determined.")
|
980 |
+
|
981 |
+
logger.info("ASGI Lifespan: Startup complete."); yield # Application runs here
|
982 |
+
|
983 |
except Exception as startup_err:
|
984 |
logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
|
985 |
+
# Ensure cleanup even if startup fails midway
|
986 |
if ptb_app:
|
987 |
if ptb_app.running: await ptb_app.stop()
|
988 |
await ptb_app.shutdown()
|
989 |
+
raise # Reraise the exception to stop the ASGI server
|
990 |
+
|
991 |
+
finally: # Shutdown phase
|
992 |
logger.info("ASGI Lifespan: Shutdown initiated...")
|
993 |
if ptb_app:
|
994 |
+
if ptb_app.running:
|
995 |
+
logger.info("Stopping PTB application...")
|
996 |
+
await ptb_app.stop()
|
997 |
+
logger.info("Shutting down PTB application...")
|
998 |
+
await ptb_app.shutdown()
|
999 |
+
logger.info("PTB Application shut down.")
|
1000 |
+
else:
|
1001 |
+
logger.info("PTB application was not initialized or failed during startup.")
|
1002 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
1003 |
|
1004 |
async def health_check(request: Request) -> PlainTextResponse:
|
1005 |
+
global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled, _crawl4ai_primary_web_enabled, _bs4_fallback_web_enabled, _urltotext_fallback_web_enabled
|
1006 |
bot_status = "Not Initialized"
|
1007 |
if ptb_app and ptb_app.bot:
|
1008 |
try:
|
1009 |
+
if ptb_app.running:
|
1010 |
+
bot_info = await ptb_app.bot.get_me()
|
1011 |
+
bot_status = f"Running (@{bot_info.username})"
|
1012 |
else: bot_status = "Initialized/Not running"
|
1013 |
except Exception as e: bot_status = f"Error checking status: {e}"
|
1014 |
+
|
1015 |
+
web_status = f"Web Primary: {'Crawl4AI' if _crawl4ai_primary_web_enabled else 'DISABLED'}\n" \
|
1016 |
+
f"Web Fallback 1: {'BS4' if _bs4_fallback_web_enabled else 'DISABLED'}\n" \
|
1017 |
+
f"Web Fallback 2: {'API' if _urltotext_fallback_web_enabled else 'DISABLED'}"
|
1018 |
+
summary_status = f"Summarizer Primary: {GEMINI_MODEL if _gemini_primary_enabled else 'DISABLED'}\n" \
|
1019 |
+
f"Summarizer Fallback: {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}"
|
1020 |
+
yt_status = f"YT Fallback 1: {'Supadata' if SUPADATA_API_KEY else 'DISABLED'}\n" \
|
1021 |
+
f"YT Fallback 2: {APIFY_ACTOR_ID if _apify_token_exists else 'DISABLED'}"
|
1022 |
+
|
1023 |
+
|
1024 |
+
return PlainTextResponse( f"TG Bot Summariser - Status: {bot_status}\n\n"
|
1025 |
+
f"{web_status}\n\n"
|
1026 |
+
f"{summary_status}\n\n"
|
1027 |
+
f"{yt_status}" )
|
1028 |
|
1029 |
async def telegram_webhook(request: Request) -> Response:
|
1030 |
global WEBHOOK_SECRET
|
1031 |
+
if not ptb_app:
|
1032 |
+
logger.error("Webhook received but PTB application is not initialized.")
|
1033 |
+
return PlainTextResponse('Bot not initialized', status_code=503) # Service Unavailable
|
1034 |
+
if not ptb_app.running:
|
1035 |
+
logger.warning("Webhook received but PTB application is not running.")
|
1036 |
+
return PlainTextResponse('Bot not running', status_code=503) # Service Unavailable
|
1037 |
+
|
1038 |
try:
|
1039 |
+
# Validate secret token if configured
|
1040 |
if WEBHOOK_SECRET:
|
1041 |
token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
|
1042 |
+
if token_header != WEBHOOK_SECRET:
|
1043 |
+
logger.warning(f"Webhook received with invalid secret token. Header: '{token_header}'")
|
1044 |
+
return Response(content="Invalid secret token", status_code=403) # Forbidden
|
1045 |
+
|
1046 |
+
# Process the update
|
1047 |
+
update_data = await request.json()
|
1048 |
+
update = Update.de_json(data=update_data, bot=ptb_app.bot)
|
1049 |
+
logger.debug(f"Processing update_id: {update.update_id} via webhook")
|
1050 |
+
await ptb_app.process_update(update)
|
1051 |
+
return Response(status_code=200) # OK - Tell Telegram we received it
|
1052 |
+
|
1053 |
+
except json.JSONDecodeError:
|
1054 |
+
logger.error("Webhook received invalid JSON payload.")
|
1055 |
+
return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
|
1056 |
+
except Exception as e:
|
1057 |
+
logger.error(f"Error processing webhook update: {e}", exc_info=True)
|
1058 |
+
# Still return OK to Telegram to prevent retries for potentially poison-pill updates,
|
1059 |
+
# but log the error for debugging.
|
1060 |
+
return Response(status_code=200)
|
1061 |
+
|
1062 |
+
# --- ASGI App Definition (Unchanged) ---
|
1063 |
+
app = Starlette(
|
1064 |
+
debug=False, # Keep debug False in production
|
1065 |
+
lifespan=lifespan,
|
1066 |
+
routes=[
|
1067 |
+
Route("/", endpoint=health_check, methods=["GET"]),
|
1068 |
+
Route("/webhook", endpoint=telegram_webhook, methods=["POST"]),
|
1069 |
+
]
|
1070 |
+
)
|
1071 |
+
logger.info("Starlette ASGI application created with health check and webhook routes.")
|
1072 |
|
1073 |
+
# --- Direct Run (for local testing, unchanged) ---
|
1074 |
if __name__ == '__main__':
|
1075 |
import uvicorn
|
1076 |
+
logger.warning("Running in development mode using Uvicorn directly (not for production)")
|
1077 |
log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
|
1078 |
+
# Use PORT from env var (like HF Spaces provides) or default to 8080 for local dev
|
1079 |
local_port = int(os.environ.get('PORT', 8080))
|
1080 |
+
# Disable webhook setup for local Uvicorn run if needed (manual polling instead)
|
1081 |
+
# You might need to comment out the webhook setting logic in lifespan for local testing
|
1082 |
+
# or run with ngrok/similar and set SPACE_HOST manually.
|
1083 |
+
# For simplicity, assuming webhook setup will just log errors if SPACE_HOST isn't set locally.
|
1084 |
uvicorn.run("__main__:app", host='0.0.0.0', port=local_port, log_level=log_level, reload=True)
|