Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# main.py (Revised with
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
@@ -7,13 +7,14 @@ import json
|
|
7 |
import html
|
8 |
import contextlib
|
9 |
import traceback
|
10 |
-
from typing import Optional
|
11 |
|
12 |
# --- Frameworks ---
|
13 |
-
|
14 |
from starlette.applications import Starlette
|
15 |
-
from starlette.routing import Mount
|
16 |
-
from starlette.
|
|
|
17 |
|
18 |
# --- Telegram Bot ---
|
19 |
from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup, Bot
|
@@ -26,26 +27,28 @@ from telegram.ext import (
|
|
26 |
CallbackQueryHandler,
|
27 |
)
|
28 |
from telegram.constants import ParseMode
|
29 |
-
from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest
|
30 |
from telegram.request import HTTPXRequest
|
31 |
|
32 |
# --- Other Libraries ---
|
33 |
import httpx
|
34 |
-
from youtube_transcript_api import YouTubeTranscriptApi
|
35 |
import requests
|
36 |
from bs4 import BeautifulSoup
|
37 |
-
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
38 |
|
39 |
_apify_token_exists = bool(os.environ.get('APIFY_API_TOKEN'))
|
40 |
if _apify_token_exists:
|
41 |
from apify_client import ApifyClient
|
|
|
42 |
else:
|
43 |
-
ApifyClient = None
|
|
|
44 |
|
45 |
# --- Logging Setup ---
|
46 |
logging.basicConfig(
|
47 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
48 |
-
level=logging.DEBUG
|
49 |
)
|
50 |
logging.getLogger("httpx").setLevel(logging.WARNING)
|
51 |
if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
|
@@ -77,36 +80,60 @@ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
|
77 |
logger.info("Secret loading attempt finished.")
|
78 |
|
79 |
# --- Retry Decorator for Bot Operations ---
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
-
# --- Helper Functions
|
99 |
def is_youtube_url(url):
|
100 |
"""Checks if the URL is a valid YouTube video or shorts URL."""
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
logger.debug(f"is_youtube_url check for '{url}': {'Match found' if match else 'No match'}")
|
104 |
return bool(match)
|
105 |
|
106 |
def extract_youtube_id(url):
|
107 |
"""Extracts the YouTube video ID from a URL."""
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
if match:
|
111 |
video_id = match.group(1)
|
112 |
logger.debug(f"Extracted YouTube ID '{video_id}' from URL: {url}")
|
@@ -115,66 +142,327 @@ def extract_youtube_id(url):
|
|
115 |
logger.warning(f"Could not extract YouTube ID from URL: {url}")
|
116 |
return None
|
117 |
|
118 |
-
# --- Content Fetching Functions
|
119 |
-
# [Keep all your existing content fetching functions exactly as they were]
|
120 |
-
# get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript,
|
121 |
-
# get_website_content_via_requests, get_website_content_via_urltotext_api, generate_summary
|
122 |
|
123 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
async def process_summary_task(
|
125 |
user_id: int,
|
126 |
chat_id: int,
|
127 |
-
message_id_to_edit: int,
|
128 |
url: str,
|
129 |
summary_type: str,
|
130 |
-
bot_token: str
|
131 |
) -> None:
|
132 |
"""Handles the actual fetching and summarization in a background task."""
|
133 |
-
task_id = f"{user_id}-{message_id_to_edit}"
|
134 |
logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
|
135 |
-
|
136 |
# Create a new bot instance for this task
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
try:
|
140 |
-
# --- Inform User Processing Has Started ---
|
141 |
-
processing_message_text = f"⏳ Working on your '{summary_type}' summary for
|
142 |
-
|
143 |
-
|
144 |
-
@retry_bot_operation
|
145 |
-
async def edit_or_send_status():
|
146 |
-
nonlocal status_message_sent_id, message_id_to_edit
|
147 |
try:
|
148 |
-
await
|
149 |
-
|
150 |
-
message_id=message_id_to_edit,
|
151 |
-
text=processing_message_text
|
152 |
-
)
|
153 |
-
logger.debug(f"[Task {task_id}] Successfully edited message {message_id_to_edit}")
|
154 |
-
except (TimedOut, NetworkError, BadRequest) as e:
|
155 |
-
logger.warning(f"[Task {task_id}] Could not edit original message: {e}. Sending new status message.")
|
156 |
-
message_id_to_edit = None
|
157 |
-
status_message = await bot.send_message(
|
158 |
chat_id=chat_id,
|
159 |
-
|
|
|
|
|
|
|
160 |
)
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
# --- Main Content Fetching and Summarization ---
|
167 |
-
content = None
|
168 |
-
user_feedback_message = None
|
169 |
-
success = False
|
170 |
-
|
171 |
try:
|
172 |
-
|
173 |
-
@retry_bot_operation
|
174 |
-
async def send_typing():
|
175 |
-
await bot.send_chat_action(chat_id=chat_id, action='typing')
|
176 |
-
|
177 |
-
await send_typing()
|
178 |
|
179 |
# --- Determine Content Type and Fetch ---
|
180 |
is_yt = is_youtube_url(url)
|
@@ -184,98 +472,122 @@ async def process_summary_task(
|
|
184 |
video_id = extract_youtube_id(url)
|
185 |
if video_id:
|
186 |
logger.info(f"[Task {task_id}] Fetching YouTube transcript for {video_id}")
|
187 |
-
content = await get_youtube_transcript(
|
188 |
-
video_id,
|
189 |
-
url,
|
190 |
-
SUPADATA_API_KEY,
|
191 |
-
APIFY_API_TOKEN
|
192 |
-
)
|
193 |
if not content:
|
194 |
-
user_feedback_message = "⚠️ Sorry, I couldn't retrieve the transcript for that YouTube video."
|
|
|
|
|
195 |
else:
|
196 |
logger.info(f"[Task {task_id}] Attempting website scrape for: {url}")
|
197 |
content = await get_website_content_via_requests(url)
|
198 |
if not content and URLTOTEXT_API_KEY:
|
199 |
-
|
|
|
200 |
content = await get_website_content_via_urltotext_api(url, URLTOTEXT_API_KEY)
|
201 |
-
|
202 |
-
|
|
|
203 |
|
204 |
# --- Generate Summary if Content Was Fetched ---
|
205 |
if content:
|
206 |
-
logger.info(f"[Task {task_id}] Generating '{summary_type}' summary")
|
207 |
-
await
|
208 |
-
|
209 |
-
summary = await generate_summary(content, summary_type, OPENROUTER_API_KEY)
|
210 |
|
211 |
-
|
212 |
-
|
|
|
|
|
213 |
else:
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
chat_id=chat_id,
|
218 |
-
text=summary,
|
219 |
-
parse_mode=ParseMode.MARKDOWN,
|
220 |
-
link_preview_options={'is_disabled': True}
|
221 |
-
)
|
222 |
-
|
223 |
-
await send_summary()
|
224 |
-
success = True
|
225 |
|
226 |
except Exception as e:
|
227 |
-
logger.error(f"[Task {task_id}] Error during
|
228 |
user_feedback_message = "❌ An unexpected error occurred while processing your request."
|
229 |
|
230 |
-
# --- Send Final
|
231 |
-
if
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
|
238 |
except Exception as e:
|
239 |
-
logger.
|
240 |
try:
|
241 |
-
await
|
|
|
242 |
chat_id=chat_id,
|
243 |
-
text="❌ A critical error occurred. Please
|
244 |
)
|
245 |
except Exception:
|
246 |
-
|
247 |
finally:
|
248 |
-
# --- Clean up Status Message
|
249 |
-
|
250 |
-
|
251 |
-
await bot.delete_message
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
await bot.edit_message_text(
|
257 |
-
chat_id=chat_id,
|
258 |
-
message_id=message_id_to_edit,
|
259 |
-
text=final_error_text[:4090]
|
260 |
-
)
|
261 |
-
except Exception as e:
|
262 |
-
logger.warning(f"[Task {task_id}] Cleanup error: {e}")
|
263 |
-
|
264 |
# Ensure bot session is closed
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
|
|
|
|
269 |
|
270 |
logger.info(f"[Task {task_id}] Task completed. Success: {success}")
|
271 |
|
|
|
272 |
# --- Telegram Bot Handlers ---
|
273 |
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
274 |
"""Handles the /start command."""
|
275 |
user = update.effective_user
|
276 |
-
if not user: return
|
277 |
logger.info(f"User {user.id} initiated /start.")
|
278 |
-
mention = user.mention_html()
|
279 |
start_message = (
|
280 |
f"👋 Hello {mention}!\n\n"
|
281 |
"I can summarise YouTube videos or web articles for you.\n\n"
|
@@ -287,7 +599,8 @@ async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
|
287 |
async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
288 |
"""Handles the /help command."""
|
289 |
user = update.effective_user
|
290 |
-
|
|
|
291 |
help_text = (
|
292 |
"**How to Use Me:**\n"
|
293 |
"1. Send me a direct link (URL) to a YouTube video or a web article.\n"
|
@@ -296,10 +609,12 @@ async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> No
|
|
296 |
"4. I'll fetch the content, summarise it using AI, and send it back to you!\n\n"
|
297 |
"**Important Notes:**\n"
|
298 |
"- **YouTube:** Getting transcripts can sometimes fail if they are disabled or unavailable.\n"
|
299 |
-
"- **Websites:**
|
300 |
-
"- **AI Summaries:** The AI
|
|
|
301 |
"Just send a link to get started!"
|
302 |
)
|
|
|
303 |
await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
|
304 |
|
305 |
async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
@@ -309,17 +624,23 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
|
|
309 |
user = update.effective_user
|
310 |
if not user: return
|
311 |
|
312 |
-
|
|
|
313 |
match = re.search(url_pattern, message_text)
|
314 |
|
315 |
if match:
|
316 |
url = match.group(0)
|
317 |
-
|
|
|
|
|
|
|
|
|
318 |
context.user_data['url_to_summarize'] = url
|
|
|
319 |
|
320 |
keyboard = [
|
321 |
[
|
322 |
-
InlineKeyboardButton("📜 Paragraph
|
323 |
InlineKeyboardButton("🔹 Bullet Points", callback_data="points")
|
324 |
]
|
325 |
]
|
@@ -331,55 +652,89 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
|
|
331 |
link_preview_options={'is_disabled': True}
|
332 |
)
|
333 |
elif not message_text.startswith('/'):
|
334 |
-
|
|
|
|
|
|
|
|
|
|
|
335 |
|
336 |
async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
337 |
"""Handles button presses for summary type selection."""
|
338 |
query = update.callback_query
|
339 |
-
if not query or not query.
|
340 |
-
|
341 |
-
|
342 |
-
except:
|
343 |
-
pass
|
344 |
return
|
345 |
|
346 |
-
await query.answer() # Acknowledge the button press immediately
|
347 |
-
|
348 |
user = query.from_user
|
349 |
summary_type = query.data
|
350 |
-
url = context.user_data.get('url_to_summarize')
|
351 |
query_id = query.id
|
352 |
|
353 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
|
355 |
if not url:
|
356 |
-
logger.warning(f"No URL found for user {user.id}")
|
357 |
try:
|
358 |
-
await query.edit_message_text(text="⚠️ Oops! I
|
359 |
except Exception as e:
|
360 |
-
logger.error(f"Failed to edit message: {e}")
|
361 |
return
|
362 |
|
363 |
-
# Clear
|
364 |
context.user_data.pop('url_to_summarize', None)
|
|
|
365 |
|
366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
asyncio.create_task(
|
368 |
process_summary_task(
|
369 |
user_id=user.id,
|
370 |
chat_id=query.message.chat_id,
|
371 |
-
message_id_to_edit=
|
372 |
url=url,
|
373 |
summary_type=summary_type,
|
374 |
-
bot_token=TELEGRAM_TOKEN
|
375 |
),
|
376 |
-
name=f"SummaryTask-{user.id}-{
|
377 |
)
|
378 |
|
|
|
379 |
async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
|
380 |
"""Log Errors caused by Updates or background tasks."""
|
381 |
-
|
382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
|
384 |
# --- Bot Setup Function ---
|
385 |
async def setup_bot_config() -> Application:
|
@@ -388,124 +743,211 @@ async def setup_bot_config() -> Application:
|
|
388 |
if not TELEGRAM_TOKEN:
|
389 |
raise ValueError("TELEGRAM_TOKEN environment variable not set.")
|
390 |
|
|
|
391 |
custom_request = HTTPXRequest(
|
392 |
connect_timeout=10.0,
|
393 |
-
read_timeout=30.0,
|
394 |
write_timeout=30.0,
|
395 |
-
pool_timeout=
|
396 |
-
http_version="1.1"
|
397 |
)
|
398 |
|
399 |
application = (
|
400 |
Application.builder()
|
401 |
.token(TELEGRAM_TOKEN)
|
402 |
.request(custom_request)
|
|
|
|
|
403 |
.build()
|
404 |
)
|
405 |
|
|
|
406 |
application.add_handler(CommandHandler("start", start))
|
407 |
application.add_handler(CommandHandler("help", help_command))
|
|
|
|
|
408 |
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
|
409 |
application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
|
|
|
|
|
410 |
application.add_error_handler(error_handler)
|
411 |
|
412 |
logger.info("Telegram application handlers configured.")
|
413 |
return application
|
414 |
|
415 |
-
# --- ASGI Lifespan Context Manager ---
|
416 |
@contextlib.asynccontextmanager
|
417 |
async def lifespan(app: Starlette):
|
418 |
"""Handles PTB startup and shutdown during ASGI lifespan."""
|
419 |
global ptb_app
|
420 |
logger.info("ASGI Lifespan: Startup sequence initiated...")
|
421 |
|
|
|
|
|
|
|
|
|
|
|
422 |
try:
|
423 |
ptb_app = await setup_bot_config()
|
424 |
-
await ptb_app.initialize()
|
425 |
-
await ptb_app.start()
|
426 |
|
427 |
bot_info = await ptb_app.bot.get_me()
|
428 |
-
|
|
|
429 |
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
webhook_path = "/webhook"
|
435 |
-
full_webhook_url = f"{WEBHOOK_URL_BASE.rstrip('/')}{webhook_path}"
|
436 |
-
|
437 |
-
logger.info(f"Setting webhook to: {full_webhook_url}")
|
438 |
-
await asyncio.sleep(2.0)
|
439 |
try:
|
440 |
-
await ptb_app.bot.
|
441 |
-
|
442 |
-
allowed_updates=Update.ALL_TYPES,
|
443 |
-
drop_pending_updates=True
|
444 |
-
)
|
445 |
-
webhook_info = await ptb_app.bot.get_webhook_info()
|
446 |
-
logger.info(f"Webhook set: {webhook_info}")
|
447 |
except Exception as e:
|
448 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
449 |
|
450 |
logger.info("ASGI Lifespan: Startup complete.")
|
451 |
-
yield
|
452 |
|
453 |
except Exception as startup_err:
|
454 |
-
logger.critical(f"
|
455 |
-
|
|
|
|
|
|
|
|
|
|
|
456 |
finally:
|
457 |
logger.info("ASGI Lifespan: Shutdown sequence initiated...")
|
458 |
if ptb_app:
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
|
|
|
|
465 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
466 |
|
467 |
-
# --- Flask App Setup ---
|
468 |
-
flask_core_app = Flask(__name__)
|
469 |
|
470 |
-
|
471 |
-
def
|
472 |
"""Basic health check endpoint."""
|
473 |
-
bot_status = "
|
474 |
if ptb_app and ptb_app.bot:
|
475 |
-
|
476 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
|
478 |
-
|
479 |
-
async def webhook() -> Response:
|
480 |
"""Webhook endpoint called by Telegram."""
|
481 |
if not ptb_app:
|
482 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
483 |
|
484 |
try:
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
490 |
await ptb_app.process_update(update)
|
491 |
-
return Response(
|
492 |
|
|
|
|
|
|
|
493 |
except Exception as e:
|
494 |
-
logger.error(f"
|
495 |
-
|
|
|
|
|
496 |
|
497 |
# --- Create Starlette ASGI Application ---
|
498 |
app = Starlette(
|
499 |
-
debug=False,
|
500 |
lifespan=lifespan,
|
501 |
routes=[
|
502 |
-
|
|
|
503 |
]
|
504 |
)
|
505 |
-
logger.info("Starlette ASGI application created.")
|
506 |
|
507 |
-
# --- Development Server Execution Block ---
|
|
|
|
|
508 |
if __name__ == '__main__':
|
509 |
-
|
510 |
-
|
511 |
-
|
|
|
|
1 |
+
# main.py (Revised with Starlette-only routes and fixes)
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
|
|
7 |
import html
|
8 |
import contextlib
|
9 |
import traceback
|
10 |
+
from typing import Optional, Dict, Any
|
11 |
|
12 |
# --- Frameworks ---
|
13 |
+
# Removed Flask imports
|
14 |
from starlette.applications import Starlette
|
15 |
+
from starlette.routing import Route # Changed from Mount
|
16 |
+
from starlette.responses import PlainTextResponse, JSONResponse, Response # Starlette responses
|
17 |
+
from starlette.requests import Request # Starlette request object
|
18 |
|
19 |
# --- Telegram Bot ---
|
20 |
from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup, Bot
|
|
|
27 |
CallbackQueryHandler,
|
28 |
)
|
29 |
from telegram.constants import ParseMode
|
30 |
+
from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest, TelegramError
|
31 |
from telegram.request import HTTPXRequest
|
32 |
|
33 |
# --- Other Libraries ---
|
34 |
import httpx
|
35 |
+
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
36 |
import requests
|
37 |
from bs4 import BeautifulSoup
|
38 |
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type, before_sleep_log
|
39 |
|
40 |
_apify_token_exists = bool(os.environ.get('APIFY_API_TOKEN'))
|
41 |
if _apify_token_exists:
|
42 |
from apify_client import ApifyClient
|
43 |
+
from apify_client.consts import ActorJobStatus
|
44 |
else:
|
45 |
+
ApifyClient = None # type: ignore # Make type checkers happy
|
46 |
+
|
47 |
|
48 |
# --- Logging Setup ---
|
49 |
logging.basicConfig(
|
50 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
51 |
+
level=logging.INFO # Changed default to INFO, DEBUG is very verbose
|
52 |
)
|
53 |
logging.getLogger("httpx").setLevel(logging.WARNING)
|
54 |
if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
|
|
|
80 |
logger.info("Secret loading attempt finished.")
|
81 |
|
82 |
# --- Retry Decorator for Bot Operations ---
|
83 |
+
# Increased attempts slightly and added logging before sleep
|
84 |
+
# Explicitly catch TelegramError subclasses for retries
|
85 |
+
@retry(
|
86 |
+
stop=stop_after_attempt(4), # Increased slightly
|
87 |
+
wait=wait_exponential(multiplier=1, min=2, max=15), # Adjusted wait
|
88 |
+
retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), # Specific Telegram errors
|
89 |
+
before_sleep=before_sleep_log(logger, logging.WARNING), # Use tenacity logger
|
90 |
+
reraise=True # Reraise the exception if all retries fail
|
91 |
+
)
|
92 |
+
async def retry_bot_operation(func, *args, **kwargs):
|
93 |
+
"""Wrapper to retry bot operations with exponential backoff."""
|
94 |
+
# Note: This is now a wrapper function, not a decorator factory
|
95 |
+
# Call it like: await retry_bot_operation(bot.send_message, chat_id=..., text=...)
|
96 |
+
try:
|
97 |
+
return await func(*args, **kwargs)
|
98 |
+
except BadRequest as e:
|
99 |
+
# Don't retry on bad requests that are likely permanent (e.g., chat not found)
|
100 |
+
# unless it's a common transient issue like message not modified
|
101 |
+
if "message is not modified" in str(e).lower():
|
102 |
+
logger.warning(f"Ignoring 'message not modified' error: {e}")
|
103 |
+
return None # Or indicate success/no action needed
|
104 |
+
logger.error(f"BadRequest during bot operation (will not retry unless specific): {e}")
|
105 |
+
raise # Reraise non-retryable BadRequests
|
106 |
+
except TelegramError as e:
|
107 |
+
logger.warning(f"TelegramError during bot operation (will retry): {e}")
|
108 |
+
raise # Reraise retryable errors for tenacity
|
109 |
+
except Exception as e:
|
110 |
+
logger.error(f"Unexpected error during bot operation: {e}", exc_info=True)
|
111 |
+
raise # Reraise unexpected errors
|
112 |
|
113 |
+
# --- Helper Functions ---
|
114 |
def is_youtube_url(url):
|
115 |
"""Checks if the URL is a valid YouTube video or shorts URL."""
|
116 |
+
# More robust regex to handle various youtube domain variations and query params
|
117 |
+
youtube_regex = re.compile(
|
118 |
+
r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/'
|
119 |
+
r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?'
|
120 |
+
r'([\w-]{11})' # Group 1: Video ID
|
121 |
+
r'(?:\S+)?', # Optional non-whitespace characters after ID
|
122 |
+
re.IGNORECASE)
|
123 |
+
match = youtube_regex.search(url)
|
124 |
logger.debug(f"is_youtube_url check for '{url}': {'Match found' if match else 'No match'}")
|
125 |
return bool(match)
|
126 |
|
127 |
def extract_youtube_id(url):
|
128 |
"""Extracts the YouTube video ID from a URL."""
|
129 |
+
# Use the same robust regex as is_youtube_url
|
130 |
+
youtube_regex = re.compile(
|
131 |
+
r'(?:https?://)?(?:www\.)?(?:m\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)/'
|
132 |
+
r'(?:watch\?v=|embed/|v/|shorts/|live/|attribution_link\?a=.*&u=/watch\?v=)?'
|
133 |
+
r'([\w-]{11})' # Group 1: Video ID
|
134 |
+
r'(?:\S+)?',
|
135 |
+
re.IGNORECASE)
|
136 |
+
match = youtube_regex.search(url)
|
137 |
if match:
|
138 |
video_id = match.group(1)
|
139 |
logger.debug(f"Extracted YouTube ID '{video_id}' from URL: {url}")
|
|
|
142 |
logger.warning(f"Could not extract YouTube ID from URL: {url}")
|
143 |
return None
|
144 |
|
145 |
+
# --- Content Fetching Functions ---
|
|
|
|
|
|
|
146 |
|
147 |
+
# Using httpx for async requests
|
148 |
+
async def fetch_url_content(url: str, timeout: int = 20) -> Optional[str]:
|
149 |
+
"""Fetches content from a URL using httpx asynchronously."""
|
150 |
+
headers = {
|
151 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
152 |
+
}
|
153 |
+
try:
|
154 |
+
async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
|
155 |
+
response = await client.get(url)
|
156 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
157 |
+
# Detect encoding, fallback to UTF-8
|
158 |
+
response.encoding = response.apparent_encoding or 'utf-8'
|
159 |
+
return response.text
|
160 |
+
except httpx.HTTPStatusError as e:
|
161 |
+
logger.error(f"HTTP error fetching {url}: {e.response.status_code} - {e}")
|
162 |
+
except httpx.RequestError as e:
|
163 |
+
logger.error(f"Request error fetching {url}: {e}")
|
164 |
+
except Exception as e:
|
165 |
+
logger.error(f"Unexpected error fetching {url}: {e}", exc_info=True)
|
166 |
+
return None
|
167 |
+
|
168 |
+
async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
|
169 |
+
"""Fetches YouTube transcript using Supadata API."""
|
170 |
+
if not api_key: return None
|
171 |
+
api_url = f"https://api.supadata.net/youtube/transcript?video_id={video_id}"
|
172 |
+
headers = {'X-API-Key': api_key, 'Accept': 'application/json'}
|
173 |
+
logger.info(f"Attempting transcript fetch via Supadata for {video_id}")
|
174 |
+
try:
|
175 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
176 |
+
response = await client.get(api_url, headers=headers)
|
177 |
+
response.raise_for_status()
|
178 |
+
data = response.json()
|
179 |
+
if data and isinstance(data, list) and data[0].get("text"):
|
180 |
+
transcript = " ".join([item["text"] for item in data if "text" in item])
|
181 |
+
logger.info(f"Supadata transcript fetched successfully for {video_id} (length: {len(transcript)})")
|
182 |
+
return transcript
|
183 |
+
else:
|
184 |
+
logger.warning(f"Supadata response format unexpected or empty for {video_id}: {data}")
|
185 |
+
return None
|
186 |
+
except httpx.HTTPStatusError as e:
|
187 |
+
logger.error(f"Supadata API HTTP error for {video_id}: {e.response.status_code} - {e}")
|
188 |
+
except Exception as e:
|
189 |
+
logger.error(f"Error fetching transcript via Supadata for {video_id}: {e}", exc_info=True)
|
190 |
+
return None
|
191 |
+
|
192 |
+
async def get_transcript_via_apify(video_id: str, api_token: str) -> Optional[str]:
|
193 |
+
"""Fetches YouTube transcript using Apify YouTube Scraper Actor."""
|
194 |
+
if not ApifyClient or not api_token: return None
|
195 |
+
logger.info(f"Attempting transcript fetch via Apify for {video_id}")
|
196 |
+
try:
|
197 |
+
client = ApifyClient(api_token)
|
198 |
+
actor_call = client.actor("clockworks/youtube-scraper").call(
|
199 |
+
run_input={
|
200 |
+
"startUrls": [f"https://www.youtube.com/watch?v={video_id}"],
|
201 |
+
"maxResultStreams": 0,
|
202 |
+
"maxResults": 0,
|
203 |
+
"maxResultCommentStreams": 0,
|
204 |
+
"proxyConfiguration": {"useApifyProxy": True},
|
205 |
+
"subtitles": True, # Explicitly request subtitles/transcript
|
206 |
+
"extendOutputFunction": 'async ({ data, item, page, request, customData, Apify }) => { return item; }',
|
207 |
+
}
|
208 |
+
)
|
209 |
+
|
210 |
+
# Wait for the actor run to complete and fetch results
|
211 |
+
dataset_items = client.dataset(actor_call["defaultDatasetId"]).list_items().items
|
212 |
+
if dataset_items:
|
213 |
+
# Look for transcript data within the results
|
214 |
+
for item in dataset_items:
|
215 |
+
if 'subtitles' in item and isinstance(item['subtitles'], list) and len(item['subtitles']) > 0:
|
216 |
+
# Combine transcript lines, assuming standard format
|
217 |
+
transcript = " ".join(line.get('text', '') for line in item['subtitles'][0].get('lines', []))
|
218 |
+
if transcript.strip():
|
219 |
+
logger.info(f"Apify transcript fetched successfully for {video_id} (length: {len(transcript)})")
|
220 |
+
return transcript.strip()
|
221 |
+
logger.warning(f"Apify run completed for {video_id}, but no transcript found in results.")
|
222 |
+
else:
|
223 |
+
logger.warning(f"Apify run completed for {video_id}, but dataset was empty.")
|
224 |
+
|
225 |
+
except Exception as e:
|
226 |
+
logger.error(f"Error fetching transcript via Apify for {video_id}: {e}", exc_info=True)
|
227 |
+
return None
|
228 |
+
|
229 |
+
|
230 |
+
async def get_youtube_transcript(video_id: str, url: str, supadata_key: Optional[str], apify_token: Optional[str]) -> Optional[str]:
|
231 |
+
"""Tries different methods to get a YouTube transcript."""
|
232 |
+
transcript = None
|
233 |
+
|
234 |
+
# 1. Try Supadata API (if key exists)
|
235 |
+
if supadata_key:
|
236 |
+
transcript = await get_transcript_via_supadata(video_id, supadata_key)
|
237 |
+
if transcript: return transcript
|
238 |
+
|
239 |
+
# 2. Try youtube-transcript-api (Direct method)
|
240 |
+
logger.info(f"Attempting transcript fetch via youtube-transcript-api for {video_id}")
|
241 |
+
try:
|
242 |
+
# Run in executor to avoid blocking async loop
|
243 |
+
transcript_list = await asyncio.to_thread(YouTubeTranscriptApi.get_transcript, video_id)
|
244 |
+
transcript = " ".join([item['text'] for item in transcript_list])
|
245 |
+
logger.info(f"youtube-transcript-api transcript fetched successfully for {video_id} (length: {len(transcript)})")
|
246 |
+
return transcript
|
247 |
+
except (TranscriptsDisabled, NoTranscriptFound):
|
248 |
+
logger.warning(f"Transcripts disabled or unavailable via youtube-transcript-api for {video_id}.")
|
249 |
+
except Exception as e:
|
250 |
+
logger.error(f"Error using youtube-transcript-api for {video_id}: {e}")
|
251 |
+
|
252 |
+
# 3. Try Apify (if token exists and other methods failed)
|
253 |
+
if not transcript and apify_token:
|
254 |
+
transcript = await get_transcript_via_apify(video_id, apify_token)
|
255 |
+
if transcript: return transcript
|
256 |
+
|
257 |
+
logger.warning(f"Failed to retrieve transcript for YouTube video {video_id} using all available methods.")
|
258 |
+
return None
|
259 |
+
|
260 |
+
async def get_website_content_via_requests(url: str) -> Optional[str]:
|
261 |
+
"""Fetches and extracts main text content from a website using BeautifulSoup."""
|
262 |
+
logger.info(f"Attempting website scrape via requests/BeautifulSoup for: {url}")
|
263 |
+
html_content = await fetch_url_content(url)
|
264 |
+
if not html_content:
|
265 |
+
return None
|
266 |
+
|
267 |
+
try:
|
268 |
+
# Run BeautifulSoup parsing in executor thread
|
269 |
+
def parse_html(content):
|
270 |
+
soup = BeautifulSoup(content, 'html.parser')
|
271 |
+
# Remove script and style elements
|
272 |
+
for script_or_style in soup(["script", "style", "nav", "footer", "aside"]):
|
273 |
+
script_or_style.decompose()
|
274 |
+
# Get text, strip whitespace, join lines
|
275 |
+
text = soup.get_text(separator='\n', strip=True)
|
276 |
+
lines = (line.strip() for line in text.splitlines())
|
277 |
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
278 |
+
text = '\n'.join(chunk for chunk in chunks if chunk)
|
279 |
+
return text
|
280 |
+
|
281 |
+
text_content = await asyncio.to_thread(parse_html, html_content)
|
282 |
+
|
283 |
+
if text_content and len(text_content) > 100: # Basic check for meaningful content
|
284 |
+
logger.info(f"Successfully scraped content via requests/BeautifulSoup for {url} (length: {len(text_content)})")
|
285 |
+
return text_content
|
286 |
+
else:
|
287 |
+
logger.warning(f"Scraping via requests/BeautifulSoup for {url} yielded minimal content (length: {len(text_content) if text_content else 0}).")
|
288 |
+
return None
|
289 |
+
except Exception as e:
|
290 |
+
logger.error(f"Error parsing website content with BeautifulSoup for {url}: {e}", exc_info=True)
|
291 |
+
return None
|
292 |
+
|
293 |
+
async def get_website_content_via_urltotext_api(url: str, api_key: str) -> Optional[str]:
|
294 |
+
"""Fetches website content using the UrlToText API."""
|
295 |
+
if not api_key: return None
|
296 |
+
api_endpoint = "https://api.urltotext.ai/text"
|
297 |
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
298 |
+
payload = {"url": url, "text_only": True} # Requesting only text content
|
299 |
+
logger.info(f"Attempting website content fetch via UrlToText API for: {url}")
|
300 |
+
|
301 |
+
try:
|
302 |
+
async with httpx.AsyncClient(timeout=45.0) as client: # Increased timeout
|
303 |
+
response = await client.post(api_endpoint, headers=headers, json=payload)
|
304 |
+
response.raise_for_status()
|
305 |
+
data = response.json()
|
306 |
+
if "text" in data and data["text"]:
|
307 |
+
content = data["text"]
|
308 |
+
logger.info(f"Successfully fetched content via UrlToText API for {url} (length: {len(content)})")
|
309 |
+
return content
|
310 |
+
else:
|
311 |
+
logger.warning(f"UrlToText API response did not contain text for {url}. Response: {data}")
|
312 |
+
return None
|
313 |
+
except httpx.HTTPStatusError as e:
|
314 |
+
logger.error(f"UrlToText API HTTP error for {url}: {e.response.status_code} - {e}")
|
315 |
+
except Exception as e:
|
316 |
+
logger.error(f"Error fetching content via UrlToText API for {url}: {e}", exc_info=True)
|
317 |
+
return None
|
318 |
+
|
319 |
+
# --- Summarization Function ---
|
320 |
+
async def generate_summary(content: str, summary_type: str, api_key: Optional[str]) -> str:
|
321 |
+
"""Generates a summary using OpenRouter API."""
|
322 |
+
if not api_key:
|
323 |
+
return "Error: OpenRouter API key is not configured."
|
324 |
+
if not content:
|
325 |
+
return "Error: No content provided to summarize."
|
326 |
+
|
327 |
+
# Basic check for content length
|
328 |
+
if len(content) < 50:
|
329 |
+
return "The provided content is too short to summarize effectively."
|
330 |
+
|
331 |
+
# Truncate content if too long (adjust limit as needed based on model context window)
|
332 |
+
# This limit depends heavily on the chosen model. Claude Sonnet 3.5 has 200k tokens.
|
333 |
+
# Let's aim for roughly 100k characters as a safe-ish limit for many models.
|
334 |
+
max_chars = 100000
|
335 |
+
if len(content) > max_chars:
|
336 |
+
logger.warning(f"Content length ({len(content)}) exceeds max_chars ({max_chars}), truncating.")
|
337 |
+
content = content[:max_chars]
|
338 |
+
|
339 |
+
prompt_template = """
|
340 |
+
Please summarize the following text.
|
341 |
+
Provide the summary in {format_style} format.
|
342 |
+
|
343 |
+
Text to summarize:
|
344 |
+
---
|
345 |
+
{text}
|
346 |
+
---
|
347 |
+
|
348 |
+
Summary ({format_style}):
|
349 |
+
"""
|
350 |
+
format_style = "a concise paragraph" if summary_type == "paragraph" else "bullet points (using * or - for each point)"
|
351 |
+
|
352 |
+
prompt = prompt_template.format(text=content, format_style=format_style)
|
353 |
+
|
354 |
+
# Recommended model: claude-3.5-sonnet-20240620, but allow others
|
355 |
+
model_to_use = os.environ.get("OPENROUTER_MODEL", "anthropic/claude-3.5-sonnet") # Default to Claude 3.5 Sonnet
|
356 |
+
|
357 |
+
logger.info(f"Sending request to OpenRouter (Model: {model_to_use}) for {summary_type} summary.")
|
358 |
+
|
359 |
+
try:
|
360 |
+
async with httpx.AsyncClient(timeout=120.0) as client: # Increased timeout for generation
|
361 |
+
response = await client.post(
|
362 |
+
url="https://openrouter.ai/api/v1/chat/completions",
|
363 |
+
headers={
|
364 |
+
"Authorization": f"Bearer {api_key}",
|
365 |
+
"Content-Type": "application/json"
|
366 |
+
},
|
367 |
+
json={
|
368 |
+
"model": model_to_use,
|
369 |
+
"messages": [{"role": "user", "content": prompt}],
|
370 |
+
"max_tokens": 1024, # Adjust as needed
|
371 |
+
},
|
372 |
+
)
|
373 |
+
response.raise_for_status()
|
374 |
+
data = response.json()
|
375 |
+
|
376 |
+
if data.get("choices") and len(data["choices"]) > 0:
|
377 |
+
summary = data["choices"][0].get("message", {}).get("content", "").strip()
|
378 |
+
if summary:
|
379 |
+
logger.info(f"Summary generated successfully (length: {len(summary)})")
|
380 |
+
# Simple Markdown sanitization (escape potential conflicts)
|
381 |
+
summary = summary.replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
|
382 |
+
return summary
|
383 |
+
else:
|
384 |
+
logger.error("OpenRouter response successful, but summary content is empty.")
|
385 |
+
return "Sorry, the AI generated an empty summary. Please try again."
|
386 |
+
else:
|
387 |
+
logger.error(f"OpenRouter response format unexpected: {data}")
|
388 |
+
return "Sorry, I received an unexpected response from the summarization service."
|
389 |
+
|
390 |
+
except httpx.HTTPStatusError as e:
|
391 |
+
error_body = ""
|
392 |
+
try:
|
393 |
+
error_body = e.response.text
|
394 |
+
except Exception:
|
395 |
+
pass # Ignore if reading body fails
|
396 |
+
logger.error(f"OpenRouter API HTTP error: {e.response.status_code} - {e}. Response body: {error_body}")
|
397 |
+
return f"Sorry, there was an error communicating with the summarization service (HTTP {e.response.status_code})."
|
398 |
+
except Exception as e:
|
399 |
+
logger.error(f"Error generating summary via OpenRouter: {e}", exc_info=True)
|
400 |
+
return "Sorry, an unexpected error occurred while generating the summary."
|
401 |
+
|
402 |
+
|
403 |
+
# --- Background Task Processing ---
|
404 |
async def process_summary_task(
|
405 |
user_id: int,
|
406 |
chat_id: int,
|
407 |
+
message_id_to_edit: Optional[int], # Can be None if original message couldn't be edited
|
408 |
url: str,
|
409 |
summary_type: str,
|
410 |
+
bot_token: str
|
411 |
) -> None:
|
412 |
"""Handles the actual fetching and summarization in a background task."""
|
413 |
+
task_id = f"{user_id}-{message_id_to_edit or 'new'}"
|
414 |
logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
|
415 |
+
|
416 |
# Create a new bot instance for this task
|
417 |
+
# Use longer timeouts for background tasks which might involve heavy network I/O
|
418 |
+
custom_request = HTTPXRequest(
|
419 |
+
connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0, http_version="1.1"
|
420 |
+
)
|
421 |
+
bot = Bot(token=bot_token, request=custom_request)
|
422 |
+
|
423 |
+
content = None
|
424 |
+
user_feedback_message = None
|
425 |
+
success = False
|
426 |
+
final_summary = ""
|
427 |
+
status_message_id = message_id_to_edit # Start assuming we can edit the original
|
428 |
+
|
429 |
try:
|
430 |
+
# --- Inform User Processing Has Started (Edit original or send new) ---
|
431 |
+
processing_message_text = f"⏳ Working on your '{summary_type}' summary for:\n`{url}`\n\n_(Fetching & summarizing...)_"
|
432 |
+
|
433 |
+
if status_message_id:
|
|
|
|
|
|
|
434 |
try:
|
435 |
+
await retry_bot_operation(
|
436 |
+
bot.edit_message_text,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
chat_id=chat_id,
|
438 |
+
message_id=status_message_id,
|
439 |
+
text=processing_message_text,
|
440 |
+
parse_mode=ParseMode.MARKDOWN,
|
441 |
+
reply_markup=None # Remove buttons
|
442 |
)
|
443 |
+
logger.debug(f"[Task {task_id}] Successfully edited message {status_message_id} to 'Processing'")
|
444 |
+
except Exception as e:
|
445 |
+
logger.warning(f"[Task {task_id}] Could not edit original message {status_message_id}: {e}. Will send a new status message.")
|
446 |
+
status_message_id = None # Flag to send a new message
|
447 |
+
|
448 |
+
if not status_message_id: # If editing failed or wasn't possible
|
449 |
+
try:
|
450 |
+
status_message = await retry_bot_operation(
|
451 |
+
bot.send_message,
|
452 |
+
chat_id=chat_id,
|
453 |
+
text=processing_message_text,
|
454 |
+
parse_mode=ParseMode.MARKDOWN
|
455 |
+
)
|
456 |
+
status_message_id = status_message.message_id
|
457 |
+
logger.debug(f"[Task {task_id}] Sent new status message {status_message_id}")
|
458 |
+
except Exception as e:
|
459 |
+
logger.error(f"[Task {task_id}] Failed to send new status message: {e}")
|
460 |
+
# Cannot proceed without informing user
|
461 |
+
raise RuntimeError("Failed to send initial status message") from e
|
462 |
|
463 |
# --- Main Content Fetching and Summarization ---
|
|
|
|
|
|
|
|
|
464 |
try:
|
465 |
+
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
|
|
|
|
|
|
|
|
|
|
466 |
|
467 |
# --- Determine Content Type and Fetch ---
|
468 |
is_yt = is_youtube_url(url)
|
|
|
472 |
video_id = extract_youtube_id(url)
|
473 |
if video_id:
|
474 |
logger.info(f"[Task {task_id}] Fetching YouTube transcript for {video_id}")
|
475 |
+
content = await get_youtube_transcript(video_id, url, SUPADATA_API_KEY, APIFY_API_TOKEN)
|
|
|
|
|
|
|
|
|
|
|
476 |
if not content:
|
477 |
+
user_feedback_message = "⚠️ Sorry, I couldn't retrieve the transcript for that YouTube video. It might be disabled or unavailable."
|
478 |
+
else:
|
479 |
+
user_feedback_message = "⚠️ Couldn't extract a valid YouTube video ID from the link."
|
480 |
else:
|
481 |
logger.info(f"[Task {task_id}] Attempting website scrape for: {url}")
|
482 |
content = await get_website_content_via_requests(url)
|
483 |
if not content and URLTOTEXT_API_KEY:
|
484 |
+
logger.info(f"[Task {task_id}] Basic scrape failed or insufficient, trying UrlToText API...")
|
485 |
+
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
486 |
content = await get_website_content_via_urltotext_api(url, URLTOTEXT_API_KEY)
|
487 |
+
|
488 |
+
if not content:
|
489 |
+
user_feedback_message = "⚠️ Sorry, I couldn't fetch or extract meaningful content from that website."
|
490 |
|
491 |
# --- Generate Summary if Content Was Fetched ---
|
492 |
if content:
|
493 |
+
logger.info(f"[Task {task_id}] Content fetched (length: {len(content)}). Generating '{summary_type}' summary.")
|
494 |
+
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
|
|
|
|
495 |
|
496 |
+
final_summary = await generate_summary(content, summary_type, OPENROUTER_API_KEY)
|
497 |
+
|
498 |
+
if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
|
499 |
+
user_feedback_message = f"⚠️ {final_summary}" # Use the error from generate_summary
|
500 |
else:
|
501 |
+
success = True # Summary generated successfully
|
502 |
+
|
503 |
+
# If content fetching failed initially, user_feedback_message is already set
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
|
505 |
except Exception as e:
|
506 |
+
logger.error(f"[Task {task_id}] Error during content fetching or summarization: {e}", exc_info=True)
|
507 |
user_feedback_message = "❌ An unexpected error occurred while processing your request."
|
508 |
|
509 |
+
# --- Send Final Result or Error ---
|
510 |
+
if success and final_summary:
|
511 |
+
# Split summary if it's too long for one message
|
512 |
+
max_length = 4096
|
513 |
+
summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
|
514 |
+
|
515 |
+
# Send the first part (or the only part)
|
516 |
+
await retry_bot_operation(
|
517 |
+
bot.send_message,
|
518 |
+
chat_id=chat_id,
|
519 |
+
text=summary_parts[0],
|
520 |
+
parse_mode=ParseMode.MARKDOWN,
|
521 |
+
link_preview_options={'is_disabled': True} # Changed to dict format for PTB v21+
|
522 |
+
)
|
523 |
+
# Send subsequent parts if any
|
524 |
+
for part in summary_parts[1:]:
|
525 |
+
await asyncio.sleep(0.5) # Small delay between parts
|
526 |
+
await retry_bot_operation(
|
527 |
+
bot.send_message,
|
528 |
+
chat_id=chat_id,
|
529 |
+
text=part,
|
530 |
+
parse_mode=ParseMode.MARKDOWN,
|
531 |
+
link_preview_options={'is_disabled': True}
|
532 |
+
)
|
533 |
+
logger.info(f"[Task {task_id}] Successfully sent summary ({len(summary_parts)} parts).")
|
534 |
+
|
535 |
+
elif user_feedback_message: # Handle errors (either from fetching or summarization)
|
536 |
+
logger.warning(f"[Task {task_id}] Sending feedback/error message: {user_feedback_message}")
|
537 |
+
await retry_bot_operation(
|
538 |
+
bot.send_message,
|
539 |
+
chat_id=chat_id,
|
540 |
+
text=user_feedback_message,
|
541 |
+
link_preview_options={'is_disabled': True}
|
542 |
+
)
|
543 |
+
|
544 |
+
else: # Should not happen, but safety net
|
545 |
+
logger.error(f"[Task {task_id}] Reached end of task without success or specific error message.")
|
546 |
+
await retry_bot_operation(
|
547 |
+
bot.send_message,
|
548 |
+
chat_id=chat_id,
|
549 |
+
text="❓ Something went wrong, but no specific error was identified.",
|
550 |
+
link_preview_options={'is_disabled': True}
|
551 |
+
)
|
552 |
+
|
553 |
|
554 |
except Exception as e:
|
555 |
+
logger.critical(f"[Task {task_id}] Critical error within task processing: {e}", exc_info=True)
|
556 |
try:
|
557 |
+
await retry_bot_operation(
|
558 |
+
bot.send_message,
|
559 |
chat_id=chat_id,
|
560 |
+
text="❌ A critical internal error occurred. Please report this if it persists."
|
561 |
)
|
562 |
except Exception:
|
563 |
+
logger.exception(f"[Task {task_id}] Failed even to send critical error message.")
|
564 |
finally:
|
565 |
+
# --- Clean up Status Message ---
|
566 |
+
if status_message_id:
|
567 |
+
try:
|
568 |
+
await retry_bot_operation(bot.delete_message, chat_id=chat_id, message_id=status_message_id)
|
569 |
+
logger.debug(f"[Task {task_id}] Deleted status message {status_message_id}")
|
570 |
+
except Exception as e:
|
571 |
+
logger.warning(f"[Task {task_id}] Failed to delete status message {status_message_id}: {e}")
|
572 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
573 |
# Ensure bot session is closed
|
574 |
+
if bot and bot.session:
|
575 |
+
try:
|
576 |
+
await bot.shutdown() # Use bot.shutdown() which includes closing the session
|
577 |
+
logger.debug(f"[Task {task_id}] Background bot instance shut down.")
|
578 |
+
except Exception as e:
|
579 |
+
logger.warning(f"[Task {task_id}] Error shutting down background bot instance: {e}")
|
580 |
|
581 |
logger.info(f"[Task {task_id}] Task completed. Success: {success}")
|
582 |
|
583 |
+
|
584 |
# --- Telegram Bot Handlers ---
|
585 |
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
586 |
"""Handles the /start command."""
|
587 |
user = update.effective_user
|
588 |
+
if not user or not update.message: return
|
589 |
logger.info(f"User {user.id} initiated /start.")
|
590 |
+
mention = user.mention_html() # mention_html is safer
|
591 |
start_message = (
|
592 |
f"👋 Hello {mention}!\n\n"
|
593 |
"I can summarise YouTube videos or web articles for you.\n\n"
|
|
|
599 |
async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
600 |
"""Handles the /help command."""
|
601 |
user = update.effective_user
|
602 |
+
if not user or not update.message: return
|
603 |
+
logger.info(f"User {user.id} requested /help.")
|
604 |
help_text = (
|
605 |
"**How to Use Me:**\n"
|
606 |
"1. Send me a direct link (URL) to a YouTube video or a web article.\n"
|
|
|
609 |
"4. I'll fetch the content, summarise it using AI, and send it back to you!\n\n"
|
610 |
"**Important Notes:**\n"
|
611 |
"- **YouTube:** Getting transcripts can sometimes fail if they are disabled or unavailable.\n"
|
612 |
+
"- **Websites:** I try my best, but complex or JavaScript-heavy sites might not work perfectly.\n"
|
613 |
+
"- **AI Summaries:** The AI aims for accuracy but may occasionally miss nuances or make errors.\n"
|
614 |
+
"- **Length Limits:** Very long videos or articles might be truncated before summarization.\n\n"
|
615 |
"Just send a link to get started!"
|
616 |
)
|
617 |
+
# Use reply_markdown_v2 for safer Markdown parsing if needed, but MARKDOWN should suffice here
|
618 |
await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
|
619 |
|
620 |
async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
|
|
624 |
user = update.effective_user
|
625 |
if not user: return
|
626 |
|
627 |
+
# Improved URL regex (simplified for matching, not validation)
|
628 |
+
url_pattern = r'https?://[^\s<>"]+|www\.[^\s<>"]+'
|
629 |
match = re.search(url_pattern, message_text)
|
630 |
|
631 |
if match:
|
632 |
url = match.group(0)
|
633 |
+
# Basic cleanup: remove trailing characters like periods or commas if they likely aren't part of URL
|
634 |
+
url = re.sub(r'[.,!?)\]>]+$', '', url)
|
635 |
+
logger.info(f"User {user.id} sent potential URL: {url}")
|
636 |
+
|
637 |
+
# Store URL and the original message ID for potential editing
|
638 |
context.user_data['url_to_summarize'] = url
|
639 |
+
context.user_data['original_message_id'] = update.message.message_id # Store for potential edit
|
640 |
|
641 |
keyboard = [
|
642 |
[
|
643 |
+
InlineKeyboardButton("📜 Paragraph", callback_data="paragraph"), # Shortened labels
|
644 |
InlineKeyboardButton("🔹 Bullet Points", callback_data="points")
|
645 |
]
|
646 |
]
|
|
|
652 |
link_preview_options={'is_disabled': True}
|
653 |
)
|
654 |
elif not message_text.startswith('/'):
|
655 |
+
# Avoid replying to non-URL, non-command text to prevent noise
|
656 |
+
logger.debug(f"User {user.id} sent non-URL, non-command text: '{message_text[:50]}...'")
|
657 |
+
# Optional: Send a hint if it looks like they *tried* to send a URL
|
658 |
+
if "http" in message_text or "www." in message_text:
|
659 |
+
await update.message.reply_text("Hmm, that looks like it might be a link, but please ensure it starts with `http://` or `https://` and is a valid URL.")
|
660 |
+
# else: do nothing
|
661 |
|
662 |
async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
663 |
"""Handles button presses for summary type selection."""
|
664 |
query = update.callback_query
|
665 |
+
if not query or not query.message or not query.from_user:
|
666 |
+
logger.warning("Callback query received without essential data.")
|
667 |
+
if query: await query.answer() # Try to answer anyway
|
|
|
|
|
668 |
return
|
669 |
|
|
|
|
|
670 |
user = query.from_user
|
671 |
summary_type = query.data
|
|
|
672 |
query_id = query.id
|
673 |
|
674 |
+
# --- Acknowledge button press ---
|
675 |
+
# This is where the original error occurred. Trying this early.
|
676 |
+
try:
|
677 |
+
await query.answer()
|
678 |
+
logger.debug(f"Acknowledged callback query {query_id} from user {user.id}")
|
679 |
+
except Exception as e:
|
680 |
+
# Log the error but try to continue, as acknowledging isn't strictly critical
|
681 |
+
logger.error(f"Error answering callback query {query_id} from user {user.id}: {e}", exc_info=True)
|
682 |
+
# The original NetworkError might still happen here in problematic environments.
|
683 |
+
# If it persists, it points to deeper issues with httpx/anyio/uvloop/PTB interaction.
|
684 |
+
|
685 |
+
# --- Retrieve URL and Original Message ID ---
|
686 |
+
url = context.user_data.get('url_to_summarize')
|
687 |
+
# Use the message ID from the callback query's message - this IS the message with buttons
|
688 |
+
message_id_to_edit = query.message.message_id
|
689 |
+
|
690 |
+
logger.info(f"User {user.id} chose summary type '{summary_type}' for URL associated with message {message_id_to_edit}")
|
691 |
|
692 |
if not url:
|
693 |
+
logger.warning(f"No URL found in user_data for user {user.id} (callback query {query_id}). Editing message.")
|
694 |
try:
|
695 |
+
await query.edit_message_text(text="⚠️ Oops! I couldn't find the link associated with this request. Please send the link again.")
|
696 |
except Exception as e:
|
697 |
+
logger.error(f"Failed to edit message to show 'URL not found' error: {e}")
|
698 |
return
|
699 |
|
700 |
+
# --- Clear user_data and schedule task ---
|
701 |
context.user_data.pop('url_to_summarize', None)
|
702 |
+
context.user_data.pop('original_message_id', None) # Clear stored ID
|
703 |
|
704 |
+
if not TELEGRAM_TOKEN:
|
705 |
+
logger.critical("TELEGRAM_TOKEN is missing, cannot start background task!")
|
706 |
+
try:
|
707 |
+
await query.edit_message_text(text="❌ Internal configuration error. Cannot process request.")
|
708 |
+
except Exception: pass
|
709 |
+
return
|
710 |
+
|
711 |
+
logger.info(f"Scheduling background task for user {user.id}, chat {query.message.chat_id}, message {message_id_to_edit}, type {summary_type}")
|
712 |
asyncio.create_task(
|
713 |
process_summary_task(
|
714 |
user_id=user.id,
|
715 |
chat_id=query.message.chat_id,
|
716 |
+
message_id_to_edit=message_id_to_edit, # Pass the ID of the message with buttons
|
717 |
url=url,
|
718 |
summary_type=summary_type,
|
719 |
+
bot_token=TELEGRAM_TOKEN # Pass token
|
720 |
),
|
721 |
+
name=f"SummaryTask-{user.id}-{message_id_to_edit}"
|
722 |
)
|
723 |
|
724 |
+
|
725 |
async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
|
726 |
"""Log Errors caused by Updates or background tasks."""
|
727 |
+
logger.error("Exception while handling an update:", exc_info=context.error)
|
728 |
+
|
729 |
+
# Optionally, inform the user about backend errors if appropriate
|
730 |
+
# Be careful not to spam users if the error is persistent
|
731 |
+
# if isinstance(context.error, (NetworkError, TimedOut)):
|
732 |
+
# # Potentially inform user about temporary issues
|
733 |
+
# pass
|
734 |
+
# elif isinstance(context.error, TelegramError):
|
735 |
+
# # Handle specific Telegram API errors if needed
|
736 |
+
# pass
|
737 |
+
|
738 |
|
739 |
# --- Bot Setup Function ---
|
740 |
async def setup_bot_config() -> Application:
|
|
|
743 |
if not TELEGRAM_TOKEN:
|
744 |
raise ValueError("TELEGRAM_TOKEN environment variable not set.")
|
745 |
|
746 |
+
# Configure httpx client for PTB (timeouts are important)
|
747 |
custom_request = HTTPXRequest(
|
748 |
connect_timeout=10.0,
|
749 |
+
read_timeout=30.0, # Read timeout for API calls like getMe, getUpdates etc.
|
750 |
write_timeout=30.0,
|
751 |
+
pool_timeout=60.0, # Allow keeping connections open longer
|
752 |
+
http_version="1.1" # Stick to 1.1 for wider compatibility
|
753 |
)
|
754 |
|
755 |
application = (
|
756 |
Application.builder()
|
757 |
.token(TELEGRAM_TOKEN)
|
758 |
.request(custom_request)
|
759 |
+
# Consider connection_pool_size if expecting high concurrency
|
760 |
+
# .connection_pool_size(10)
|
761 |
.build()
|
762 |
)
|
763 |
|
764 |
+
# Add handlers
|
765 |
application.add_handler(CommandHandler("start", start))
|
766 |
application.add_handler(CommandHandler("help", help_command))
|
767 |
+
# Use TEXT filter combined with URL entity/filter for better targeting?
|
768 |
+
# For simplicity, keeping the regex check inside the handler for now.
|
769 |
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
|
770 |
application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
|
771 |
+
|
772 |
+
# Add error handler
|
773 |
application.add_error_handler(error_handler)
|
774 |
|
775 |
logger.info("Telegram application handlers configured.")
|
776 |
return application
|
777 |
|
778 |
+
# --- ASGI Lifespan Context Manager (Mostly Unchanged) ---
|
779 |
@contextlib.asynccontextmanager
|
780 |
async def lifespan(app: Starlette):
|
781 |
"""Handles PTB startup and shutdown during ASGI lifespan."""
|
782 |
global ptb_app
|
783 |
logger.info("ASGI Lifespan: Startup sequence initiated...")
|
784 |
|
785 |
+
if not TELEGRAM_TOKEN:
|
786 |
+
logger.critical("TELEGRAM_TOKEN is not set. Bot cannot start.")
|
787 |
+
raise RuntimeError("Telegram token missing.")
|
788 |
+
|
789 |
+
bot_info_text = "Bot info not available yet."
|
790 |
try:
|
791 |
ptb_app = await setup_bot_config()
|
792 |
+
await ptb_app.initialize() # Initialize application resources
|
|
|
793 |
|
794 |
bot_info = await ptb_app.bot.get_me()
|
795 |
+
bot_info_text = f"@{bot_info.username} (ID: {bot_info.id})"
|
796 |
+
logger.info(f"Bot initialized: {bot_info_text}")
|
797 |
|
798 |
+
# Check for existing webhook and potentially delete it before setting a new one
|
799 |
+
current_webhook_info = await ptb_app.bot.get_webhook_info()
|
800 |
+
if current_webhook_info and current_webhook_info.url:
|
801 |
+
logger.info(f"Found existing webhook: {current_webhook_info.url}. Attempting to delete it.")
|
|
|
|
|
|
|
|
|
|
|
802 |
try:
|
803 |
+
await ptb_app.bot.delete_webhook(drop_pending_updates=True)
|
804 |
+
logger.info("Existing webhook deleted successfully.")
|
|
|
|
|
|
|
|
|
|
|
805 |
except Exception as e:
|
806 |
+
logger.warning(f"Could not delete existing webhook: {e}")
|
807 |
+
await asyncio.sleep(1) # Short pause before setting new one
|
808 |
+
|
809 |
+
|
810 |
+
# Determine Webhook URL from Hugging Face environment variables
|
811 |
+
space_host = os.environ.get("SPACE_HOST")
|
812 |
+
webhook_path = "/webhook" # Keep it simple
|
813 |
+
full_webhook_url = None
|
814 |
+
|
815 |
+
if space_host:
|
816 |
+
# Ensure it starts with https://
|
817 |
+
if not space_host.startswith("https://"):
|
818 |
+
# HF usually provides full host like 'user-repo.hf.space'
|
819 |
+
# Prepend https:// if missing
|
820 |
+
if not space_host.startswith("http"):
|
821 |
+
full_webhook_url = f"https://{space_host.rstrip('/')}{webhook_path}"
|
822 |
+
else: # If it starts with http://, replace it (HF uses https)
|
823 |
+
full_webhook_url = f"https://{space_host.split('://')[1].rstrip('/')}{webhook_path}"
|
824 |
+
|
825 |
+
if full_webhook_url:
|
826 |
+
logger.info(f"Attempting to set webhook to: {full_webhook_url}")
|
827 |
+
await asyncio.sleep(2.0) # Give network time to settle
|
828 |
+
try:
|
829 |
+
await ptb_app.bot.set_webhook(
|
830 |
+
url=full_webhook_url,
|
831 |
+
allowed_updates=Update.ALL_TYPES, # Or specify types like ['message', 'callback_query']
|
832 |
+
drop_pending_updates=True,
|
833 |
+
secret_token=os.environ.get("WEBHOOK_SECRET") # Optional: Add a secret token for security
|
834 |
+
)
|
835 |
+
webhook_info = await ptb_app.bot.get_webhook_info()
|
836 |
+
logger.info(f"Webhook successfully set: URL='{webhook_info.url}', Pending={webhook_info.pending_update_count}")
|
837 |
+
# Start PTB's internal runners *after* setting the webhook
|
838 |
+
await ptb_app.start()
|
839 |
+
logger.info("PTB Application started (webhook mode). Ready for updates.")
|
840 |
+
except Exception as e:
|
841 |
+
logger.error(f"FATAL: Failed to set webhook to {full_webhook_url}: {e}", exc_info=True)
|
842 |
+
# Decide how to handle failure: maybe try polling or just exit?
|
843 |
+
# For HF Spaces webhook is preferred. Raising error might be best.
|
844 |
+
raise RuntimeError(f"Failed to set webhook: {e}") from e
|
845 |
+
else:
|
846 |
+
logger.warning("Could not construct valid HTTPS webhook URL from SPACE_HOST.")
|
847 |
+
# Fallback or error needed here? For HF, webhook is essential.
|
848 |
+
raise RuntimeError("Webhook URL could not be determined.")
|
849 |
+
else:
|
850 |
+
logger.warning("SPACE_HOST environment variable not found. Cannot set webhook.")
|
851 |
+
# Maybe fall back to polling for local dev? Not ideal for HF.
|
852 |
+
# logger.info("Starting PTB in polling mode (no SPACE_HOST found).")
|
853 |
+
# await ptb_app.start() # Start runners
|
854 |
+
# application.run_polling(drop_pending_updates=True) # Start polling - BLOCKING! Not suitable for ASGI app.
|
855 |
+
raise RuntimeError("SPACE_HOST env var missing, cannot run in webhook mode.")
|
856 |
+
|
857 |
|
858 |
logger.info("ASGI Lifespan: Startup complete.")
|
859 |
+
yield # Application runs here
|
860 |
|
861 |
except Exception as startup_err:
|
862 |
+
logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
|
863 |
+
# Ensure cleanup happens even if startup fails partially
|
864 |
+
if ptb_app and ptb_app.running:
|
865 |
+
await ptb_app.stop()
|
866 |
+
if ptb_app:
|
867 |
+
await ptb_app.shutdown()
|
868 |
+
raise # Reraise the error to stop the ASGI server
|
869 |
finally:
|
870 |
logger.info("ASGI Lifespan: Shutdown sequence initiated...")
|
871 |
if ptb_app:
|
872 |
+
if ptb_app.running:
|
873 |
+
logger.info("Stopping PTB application...")
|
874 |
+
await ptb_app.stop()
|
875 |
+
logger.info("Shutting down PTB application...")
|
876 |
+
await ptb_app.shutdown()
|
877 |
+
logger.info("PTB Application shut down gracefully.")
|
878 |
+
else:
|
879 |
+
logger.info("PTB application was not initialized or startup failed.")
|
880 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
881 |
|
|
|
|
|
882 |
|
883 |
+
# --- Starlette Route Handlers ---
|
884 |
+
async def health_check(request: Request) -> PlainTextResponse:
|
885 |
"""Basic health check endpoint."""
|
886 |
+
bot_status = "Not Initialized"
|
887 |
if ptb_app and ptb_app.bot:
|
888 |
+
try:
|
889 |
+
# Perform a lightweight check like get_me again, or use a flag
|
890 |
+
if ptb_app.running:
|
891 |
+
bot_info = await ptb_app.bot.get_me() # Cached usually
|
892 |
+
bot_status = f"Running (@{bot_info.username})"
|
893 |
+
else:
|
894 |
+
bot_status = "Initialized but not running"
|
895 |
+
except Exception as e:
|
896 |
+
bot_status = f"Error checking status: {e}"
|
897 |
+
|
898 |
+
return PlainTextResponse(f"Telegram Bot Summarizer - Status: {bot_status}")
|
899 |
|
900 |
+
async def telegram_webhook(request: Request) -> Response:
|
|
|
901 |
"""Webhook endpoint called by Telegram."""
|
902 |
if not ptb_app:
|
903 |
+
logger.error("Webhook received but PTB application not initialized.")
|
904 |
+
return PlainTextResponse('Bot not initialized', status_code=503)
|
905 |
+
if not ptb_app.running:
|
906 |
+
logger.warning("Webhook received but PTB application not running.")
|
907 |
+
# Maybe return 200 OK anyway so Telegram doesn't retry too much?
|
908 |
+
# Or 503 Service Unavailable
|
909 |
+
return PlainTextResponse('Bot initialized but not running', status_code=503)
|
910 |
|
911 |
try:
|
912 |
+
# Check secret token if configured
|
913 |
+
secret_token = os.environ.get("WEBHOOK_SECRET")
|
914 |
+
if secret_token:
|
915 |
+
if request.headers.get("X-Telegram-Bot-Api-Secret-Token") != secret_token:
|
916 |
+
logger.warning("Webhook received with invalid secret token.")
|
917 |
+
return Response(status_code=403) # Forbidden
|
918 |
+
|
919 |
+
# Process the update
|
920 |
+
update_data = await request.json()
|
921 |
+
update = Update.de_json(data=update_data, bot=ptb_app.bot)
|
922 |
+
logger.debug(f"Processing update_id: {update.update_id}")
|
923 |
await ptb_app.process_update(update)
|
924 |
+
return Response(status_code=200) # OK
|
925 |
|
926 |
+
except json.JSONDecodeError:
|
927 |
+
logger.error("Webhook received invalid JSON.")
|
928 |
+
return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
|
929 |
except Exception as e:
|
930 |
+
logger.error(f"Error processing webhook update: {e}", exc_info=True)
|
931 |
+
# Return 200 OK even on errors to prevent Telegram retries if the error is in processing logic
|
932 |
+
# Alternatively return 500 if it's a server-side issue. 200 is often safer for webhooks.
|
933 |
+
return Response(status_code=200) # OK despite error
|
934 |
|
935 |
# --- Create Starlette ASGI Application ---
|
936 |
app = Starlette(
|
937 |
+
debug=False, # Set debug based on environment if needed
|
938 |
lifespan=lifespan,
|
939 |
routes=[
|
940 |
+
Route("/", endpoint=health_check, methods=["GET"]),
|
941 |
+
Route("/webhook", endpoint=telegram_webhook, methods=["POST"]),
|
942 |
]
|
943 |
)
|
944 |
+
logger.info("Starlette ASGI application created with native routes.")
|
945 |
|
946 |
+
# --- Development Server Execution Block (Optional) ---
|
947 |
+
# This block is generally NOT used when deploying with Gunicorn/Uvicorn
|
948 |
+
# Keep it for potential local testing without gunicorn
|
949 |
if __name__ == '__main__':
|
950 |
+
import uvicorn
|
951 |
+
logger.warning("Running in development mode using Uvicorn directly (not for production)")
|
952 |
+
local_port = int(os.environ.get('PORT', 8080)) # Use PORT if set, else 8080
|
953 |
+
uvicorn.run(app, host='0.0.0.0', port=local_port, log_level="info")
|