Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# main.py (Revised: Starlette Lifespan + ptb_app._running fix)
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
@@ -25,8 +25,10 @@ from telegram.ext import (
|
|
25 |
)
|
26 |
from telegram.constants import ParseMode
|
27 |
from telegram.error import NetworkError, RetryAfter # Import RetryAfter
|
|
|
28 |
|
29 |
# --- Other Libraries ---
|
|
|
30 |
from youtube_transcript_api import YouTubeTranscriptApi
|
31 |
import requests
|
32 |
from bs4 import BeautifulSoup
|
@@ -41,14 +43,14 @@ logging.basicConfig(
|
|
41 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
42 |
level=logging.DEBUG
|
43 |
)
|
44 |
-
logging.getLogger("httpx").setLevel(logging.WARNING)
|
45 |
if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
|
46 |
logging.getLogger("telegram.ext").setLevel(logging.INFO)
|
47 |
logging.getLogger('telegram.bot').setLevel(logging.INFO)
|
48 |
logging.getLogger("urllib3").setLevel(logging.INFO)
|
49 |
logging.getLogger('gunicorn.error').setLevel(logging.INFO)
|
50 |
-
logging.getLogger('uvicorn').setLevel(logging.INFO)
|
51 |
-
logging.getLogger('starlette').setLevel(logging.INFO)
|
52 |
logger = logging.getLogger(__name__)
|
53 |
logger.info("Logging configured.")
|
54 |
|
@@ -71,6 +73,7 @@ SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
|
|
71 |
APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
72 |
logger.info("Secret loading attempt finished.")
|
73 |
|
|
|
74 |
# --- Bot Logic Functions ---
|
75 |
|
76 |
# Helper Functions
|
@@ -160,7 +163,7 @@ async def get_transcript_via_apify(video_url: str, api_token: str):
|
|
160 |
params = {"token": api_token}
|
161 |
payload = json.dumps({
|
162 |
"urls": [video_url],
|
163 |
-
"outputFormat": "singleStringText",
|
164 |
"maxRetries": 3,
|
165 |
"channelHandleBoolean": False,
|
166 |
"channelNameBoolean": False,
|
@@ -331,23 +334,53 @@ async def get_website_content_via_urltotext_api(url: str, api_key: str):
|
|
331 |
except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - URLToText API] Request error: {e}"); return None
|
332 |
except Exception as e: logger.error(f"[Web Scraper - URLToText API] Unexpected error: {e}", exc_info=True); return None
|
333 |
|
334 |
-
# DeepSeek Summary Function
|
335 |
async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
|
336 |
"""Generates summary using DeepSeek via OpenRouter API."""
|
337 |
logger.info(f"Generating '{summary_type}' summary. Input length: {len(text)}")
|
338 |
if not api_key: logger.error("OpenRouter API key missing."); return "Error: AI config key missing."
|
339 |
if not text: logger.warning("generate_summary called with empty text."); return "Error: No content to summarize."
|
340 |
openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"; model_name = "deepseek/deepseek-chat:free"
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input text ({len(text)}) > limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Truncated)"
|
346 |
full_prompt = f"{prompt}\n\n--- Start of Text ---\n\n{text}\n\n--- End of Text ---"
|
347 |
-
|
|
|
348 |
referer_url = f"https://{space_host}" if not space_host.startswith("http") else space_host
|
349 |
headers = {"Authorization": f"Bearer {api_key}","Content-Type": "application/json","HTTP-Referer": referer_url,"X-Title": "Telegram URL Summarizer Bot"}
|
350 |
payload = json.dumps({"model": model_name, "messages": [{"role": "user", "content": full_prompt}]})
|
|
|
351 |
try:
|
352 |
logger.debug(f"Sending request to OpenRouter (Model: {model_name})...")
|
353 |
response = await asyncio.to_thread(requests.post, openrouter_api_endpoint, headers=headers, data=payload, timeout=90)
|
@@ -476,25 +509,38 @@ async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> N
|
|
476 |
"""Log Errors caused by Updates."""
|
477 |
logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
|
478 |
|
479 |
-
# --- Bot Setup Function (Configure
|
480 |
async def setup_bot_config() -> Application:
|
481 |
-
"""Configures the PTB Application
|
482 |
logger.info("Configuring Telegram Application...")
|
483 |
if not TELEGRAM_TOKEN:
|
484 |
logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable not found.")
|
485 |
raise ValueError("TELEGRAM_TOKEN environment variable not set.")
|
486 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
487 |
application.add_handler(CommandHandler("start", start))
|
488 |
application.add_handler(CommandHandler("help", help_command))
|
489 |
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
|
490 |
application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
|
491 |
application.add_error_handler(error_handler)
|
|
|
492 |
logger.info("Telegram handlers configured.")
|
493 |
return application
|
494 |
|
495 |
# --- ASGI Lifespan Context Manager ---
|
496 |
@contextlib.asynccontextmanager
|
497 |
-
async def lifespan(app: Starlette):
|
498 |
"""Handles PTB startup and shutdown during ASGI lifespan."""
|
499 |
global ptb_app
|
500 |
logger.info("ASGI Lifespan: Startup commencing...")
|
@@ -508,7 +554,6 @@ async def lifespan(app: Starlette): # app argument is the Starlette instance
|
|
508 |
await ptb_app.start()
|
509 |
logger.info(f"PTB App started. Bot details: {ptb_app.bot.username}")
|
510 |
|
511 |
-
# Set webhook after start
|
512 |
WEBHOOK_URL_BASE = os.environ.get("SPACE_HOST")
|
513 |
if WEBHOOK_URL_BASE:
|
514 |
if not WEBHOOK_URL_BASE.startswith("https://"): WEBHOOK_URL_BASE = f"https://{WEBHOOK_URL_BASE}"
|
@@ -516,9 +561,7 @@ async def lifespan(app: Starlette): # app argument is the Starlette instance
|
|
516 |
full_webhook_url = f"{WEBHOOK_URL_BASE.rstrip('/')}{webhook_path}"
|
517 |
logger.info(f"Attempting to set webhook to: {full_webhook_url}")
|
518 |
try:
|
519 |
-
|
520 |
-
# This might help avoid the initial rate limit error, although one worker succeeding is enough.
|
521 |
-
await asyncio.sleep(1.5) # Wait 1.5 seconds
|
522 |
await ptb_app.bot.set_webhook(url=full_webhook_url, allowed_updates=Update.ALL_TYPES)
|
523 |
webhook_info = await ptb_app.bot.get_webhook_info()
|
524 |
logger.info(f"Webhook set successfully! Info: {webhook_info}")
|
@@ -535,10 +578,8 @@ async def lifespan(app: Starlette): # app argument is the Starlette instance
|
|
535 |
logger.critical(f"CRITICAL ERROR during ASGI startup: {startup_err}", exc_info=True)
|
536 |
raise
|
537 |
finally:
|
538 |
-
# --- Shutdown ---
|
539 |
logger.info("ASGI Lifespan: Shutdown commencing...")
|
540 |
-
|
541 |
-
if ptb_app and ptb_app._running: # <--- Use _running here too
|
542 |
try:
|
543 |
logger.info("Stopping PTB App...")
|
544 |
await ptb_app.stop()
|
@@ -547,10 +588,8 @@ async def lifespan(app: Starlette): # app argument is the Starlette instance
|
|
547 |
logger.info("PTB App shut down successfully.")
|
548 |
except Exception as shutdown_err:
|
549 |
logger.error(f"Error during PTB shutdown: {shutdown_err}", exc_info=True)
|
550 |
-
elif ptb_app:
|
551 |
-
|
552 |
-
else:
|
553 |
-
logger.warning("No PTB App instance found at shutdown.")
|
554 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
555 |
|
556 |
|
@@ -564,27 +603,18 @@ def index():
|
|
564 |
"""Basic health check endpoint."""
|
565 |
logger.debug("Health check '/' accessed.")
|
566 |
bot_status = "UNKNOWN"
|
567 |
-
if ptb_app:
|
568 |
-
|
569 |
-
bot_status = "Running" if ptb_app._running else "Initialized/Stopped/Starting/Error"
|
570 |
-
# --- END CORRECTION ---
|
571 |
-
else:
|
572 |
-
bot_status = "Not Initialized"
|
573 |
return f"Telegram Bot Webhook Listener ({bot_status}) running via Starlette."
|
574 |
|
575 |
@flask_core_app.route('/webhook', methods=['POST'])
|
576 |
async def webhook() -> Response:
|
577 |
"""Webhook endpoint for Telegram updates."""
|
578 |
-
if not ptb_app:
|
579 |
logger.error("Webhook triggered, but PTB Application instance (ptb_app) is None.")
|
580 |
return Response('Bot service not configured.', status=503)
|
581 |
-
|
582 |
-
|
583 |
-
# Use the internal _running attribute as suggested by the AttributeError
|
584 |
-
if not ptb_app._running:
|
585 |
-
# --- END CORRECTION ---
|
586 |
-
status = "Not Running" # If instance exists but not running
|
587 |
-
logger.error(f"Webhook triggered, but PTB Application is {status}.")
|
588 |
return Response('Bot service not ready.', status=503)
|
589 |
|
590 |
logger.debug("Webhook request received (POST)...")
|
@@ -593,7 +623,7 @@ async def webhook() -> Response:
|
|
593 |
update_data = request.get_json()
|
594 |
update = Update.de_json(update_data, ptb_app.bot)
|
595 |
logger.debug(f"Processing update ID: {update.update_id} via webhook")
|
596 |
-
await ptb_app.process_update(update)
|
597 |
logger.debug(f"Finished processing update ID: {update.update_id}")
|
598 |
return Response('ok', status=200)
|
599 |
except json.JSONDecodeError: logger.error("Failed JSON decode from Telegram."); return Response('Bad Request: Invalid JSON', status=400)
|
|
|
1 |
+
# main.py (Revised: Starlette Lifespan + ptb_app._running fix + HTTPX Config + Updated Prompts)
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
|
|
25 |
)
|
26 |
from telegram.constants import ParseMode
|
27 |
from telegram.error import NetworkError, RetryAfter # Import RetryAfter
|
28 |
+
from telegram.request import HTTPXRequest # Import the request class
|
29 |
|
30 |
# --- Other Libraries ---
|
31 |
+
import httpx # Import httpx directly
|
32 |
from youtube_transcript_api import YouTubeTranscriptApi
|
33 |
import requests
|
34 |
from bs4 import BeautifulSoup
|
|
|
43 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
44 |
level=logging.DEBUG
|
45 |
)
|
46 |
+
logging.getLogger("httpx").setLevel(logging.WARNING) # Keep httpx logs quieter unless debugging it
|
47 |
if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
|
48 |
logging.getLogger("telegram.ext").setLevel(logging.INFO)
|
49 |
logging.getLogger('telegram.bot').setLevel(logging.INFO)
|
50 |
logging.getLogger("urllib3").setLevel(logging.INFO)
|
51 |
logging.getLogger('gunicorn.error').setLevel(logging.INFO)
|
52 |
+
logging.getLogger('uvicorn').setLevel(logging.INFO) # Uvicorn logs (incl. access)
|
53 |
+
logging.getLogger('starlette').setLevel(logging.INFO) # Starlette logs
|
54 |
logger = logging.getLogger(__name__)
|
55 |
logger.info("Logging configured.")
|
56 |
|
|
|
73 |
APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
74 |
logger.info("Secret loading attempt finished.")
|
75 |
|
76 |
+
|
77 |
# --- Bot Logic Functions ---
|
78 |
|
79 |
# Helper Functions
|
|
|
163 |
params = {"token": api_token}
|
164 |
payload = json.dumps({
|
165 |
"urls": [video_url],
|
166 |
+
"outputFormat": "singleStringText",
|
167 |
"maxRetries": 3,
|
168 |
"channelHandleBoolean": False,
|
169 |
"channelNameBoolean": False,
|
|
|
334 |
except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - URLToText API] Request error: {e}"); return None
|
335 |
except Exception as e: logger.error(f"[Web Scraper - URLToText API] Unexpected error: {e}", exc_info=True); return None
|
336 |
|
337 |
+
# DeepSeek Summary Function (with updated prompts)
|
338 |
async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
|
339 |
"""Generates summary using DeepSeek via OpenRouter API."""
|
340 |
logger.info(f"Generating '{summary_type}' summary. Input length: {len(text)}")
|
341 |
if not api_key: logger.error("OpenRouter API key missing."); return "Error: AI config key missing."
|
342 |
if not text: logger.warning("generate_summary called with empty text."); return "Error: No content to summarize."
|
343 |
openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"; model_name = "deepseek/deepseek-chat:free"
|
344 |
+
|
345 |
+
# --- UPDATED PROMPTS ---
|
346 |
+
if summary_type == "paragraph":
|
347 |
+
prompt = (
|
348 |
+
"You are an AI model designed to provide concise summaries using British English spellings. "
|
349 |
+
"Your output should be:\n"
|
350 |
+
"• Clear and simple, use a level of language and vocabulary such that someone who isn’t familiar with the topic mentioned would still be able to understand, keeping British spellings throughout.\n"
|
351 |
+
"• Use straightforward and understandable language. Avoid overly complex or advanced vocabulary.\n"
|
352 |
+
"• Presented in one paragraph.\n"
|
353 |
+
"• A summary that is no more than 85 words; ensure it remains concise.\n"
|
354 |
+
"• Consider the entire text’s content, not just the beginning or a few topics: give equal attention to all parts.\n"
|
355 |
+
"• Do not use em dash (– or —) symbols; use semicolons instead.\n\n"
|
356 |
+
"Now, please summarize the following text according to these rules:"
|
357 |
+
)
|
358 |
+
elif summary_type == "points":
|
359 |
+
prompt = (
|
360 |
+
"You are an AI model designed to provide concise summaries using British English spellings.\n"
|
361 |
+
"Your output should be:\n"
|
362 |
+
"• Clear and simple, use a level of language and vocabulary such that someone who isn’t familiar with the topic mentioned would still be able to understand, keeping British spellings throughout.\n"
|
363 |
+
"• Use straightforward and understandable language. Avoid overly complex or advanced vocabulary.\n"
|
364 |
+
"• Presented as clear, distinct bullet points (using '*' or '-' at the start of each point).\n"
|
365 |
+
"• Each point should highlight a key piece of information, finding, or main topic discussed.\n"
|
366 |
+
"• Consider the entire text’s content, not just the beginning or a few topics: give equal attention to all parts.\n"
|
367 |
+
"• Do not use em dash (– or —) symbols; use semicolons instead.\n\n"
|
368 |
+
"Now, please summarize the following text into bullet points according to these rules:"
|
369 |
+
)
|
370 |
+
else:
|
371 |
+
logger.error(f"Invalid summary_type '{summary_type}'.");
|
372 |
+
return f"Error: Invalid summary type ('{summary_type}')."
|
373 |
+
# --- END UPDATED PROMPTS ---
|
374 |
+
|
375 |
+
MAX_INPUT_LENGTH = 500000 # Keep practical limit
|
376 |
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input text ({len(text)}) > limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Truncated)"
|
377 |
full_prompt = f"{prompt}\n\n--- Start of Text ---\n\n{text}\n\n--- End of Text ---"
|
378 |
+
|
379 |
+
space_host = os.environ.get("SPACE_HOST", "huggingface.co/spaces/YOUR_SPACE_NAME") # Replace with actual space if needed
|
380 |
referer_url = f"https://{space_host}" if not space_host.startswith("http") else space_host
|
381 |
headers = {"Authorization": f"Bearer {api_key}","Content-Type": "application/json","HTTP-Referer": referer_url,"X-Title": "Telegram URL Summarizer Bot"}
|
382 |
payload = json.dumps({"model": model_name, "messages": [{"role": "user", "content": full_prompt}]})
|
383 |
+
|
384 |
try:
|
385 |
logger.debug(f"Sending request to OpenRouter (Model: {model_name})...")
|
386 |
response = await asyncio.to_thread(requests.post, openrouter_api_endpoint, headers=headers, data=payload, timeout=90)
|
|
|
509 |
"""Log Errors caused by Updates."""
|
510 |
logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
|
511 |
|
512 |
+
# --- Bot Setup Function (Modified: Configure HTTPX Client) ---
|
513 |
async def setup_bot_config() -> Application:
|
514 |
+
"""Configures the PTB Application with custom HTTPX settings."""
|
515 |
logger.info("Configuring Telegram Application...")
|
516 |
if not TELEGRAM_TOKEN:
|
517 |
logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable not found.")
|
518 |
raise ValueError("TELEGRAM_TOKEN environment variable not set.")
|
519 |
+
|
520 |
+
httpx_settings = {
|
521 |
+
"timeout": httpx.Timeout(timeout=10.0, read=10.0, connect=5.0, pool=10.0),
|
522 |
+
"limits": httpx.Limits(max_keepalive_connections=10, max_connections=100, keepalive_expiry=15.0) # Increased expiry
|
523 |
+
}
|
524 |
+
logger.info(f"Creating PTB HTTPXRequest with settings: {httpx_settings}")
|
525 |
+
custom_request = HTTPXRequest(connection_pool_size=10, connect_timeout=5.0, read_timeout=10.0, http_version="1.1", httpx_settings=httpx_settings)
|
526 |
+
|
527 |
+
application_builder = Application.builder().token(TELEGRAM_TOKEN)
|
528 |
+
application_builder.request(custom_request)
|
529 |
+
application_builder.get_updates_request(custom_request)
|
530 |
+
application = application_builder.build()
|
531 |
+
|
532 |
application.add_handler(CommandHandler("start", start))
|
533 |
application.add_handler(CommandHandler("help", help_command))
|
534 |
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
|
535 |
application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
|
536 |
application.add_error_handler(error_handler)
|
537 |
+
|
538 |
logger.info("Telegram handlers configured.")
|
539 |
return application
|
540 |
|
541 |
# --- ASGI Lifespan Context Manager ---
|
542 |
@contextlib.asynccontextmanager
|
543 |
+
async def lifespan(app: Starlette):
|
544 |
"""Handles PTB startup and shutdown during ASGI lifespan."""
|
545 |
global ptb_app
|
546 |
logger.info("ASGI Lifespan: Startup commencing...")
|
|
|
554 |
await ptb_app.start()
|
555 |
logger.info(f"PTB App started. Bot details: {ptb_app.bot.username}")
|
556 |
|
|
|
557 |
WEBHOOK_URL_BASE = os.environ.get("SPACE_HOST")
|
558 |
if WEBHOOK_URL_BASE:
|
559 |
if not WEBHOOK_URL_BASE.startswith("https://"): WEBHOOK_URL_BASE = f"https://{WEBHOOK_URL_BASE}"
|
|
|
561 |
full_webhook_url = f"{WEBHOOK_URL_BASE.rstrip('/')}{webhook_path}"
|
562 |
logger.info(f"Attempting to set webhook to: {full_webhook_url}")
|
563 |
try:
|
564 |
+
await asyncio.sleep(1.5) # Small delay before setting webhook
|
|
|
|
|
565 |
await ptb_app.bot.set_webhook(url=full_webhook_url, allowed_updates=Update.ALL_TYPES)
|
566 |
webhook_info = await ptb_app.bot.get_webhook_info()
|
567 |
logger.info(f"Webhook set successfully! Info: {webhook_info}")
|
|
|
578 |
logger.critical(f"CRITICAL ERROR during ASGI startup: {startup_err}", exc_info=True)
|
579 |
raise
|
580 |
finally:
|
|
|
581 |
logger.info("ASGI Lifespan: Shutdown commencing...")
|
582 |
+
if ptb_app and ptb_app._running: # Use _running
|
|
|
583 |
try:
|
584 |
logger.info("Stopping PTB App...")
|
585 |
await ptb_app.stop()
|
|
|
588 |
logger.info("PTB App shut down successfully.")
|
589 |
except Exception as shutdown_err:
|
590 |
logger.error(f"Error during PTB shutdown: {shutdown_err}", exc_info=True)
|
591 |
+
elif ptb_app: logger.warning("PTB App instance exists but was not running at shutdown.")
|
592 |
+
else: logger.warning("No PTB App instance found at shutdown.")
|
|
|
|
|
593 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
594 |
|
595 |
|
|
|
603 |
"""Basic health check endpoint."""
|
604 |
logger.debug("Health check '/' accessed.")
|
605 |
bot_status = "UNKNOWN"
|
606 |
+
if ptb_app: bot_status = "Running" if ptb_app._running else "Initialized/Stopped/Starting/Error" # Use _running
|
607 |
+
else: bot_status = "Not Initialized"
|
|
|
|
|
|
|
|
|
608 |
return f"Telegram Bot Webhook Listener ({bot_status}) running via Starlette."
|
609 |
|
610 |
@flask_core_app.route('/webhook', methods=['POST'])
|
611 |
async def webhook() -> Response:
|
612 |
"""Webhook endpoint for Telegram updates."""
|
613 |
+
if not ptb_app:
|
614 |
logger.error("Webhook triggered, but PTB Application instance (ptb_app) is None.")
|
615 |
return Response('Bot service not configured.', status=503)
|
616 |
+
if not ptb_app._running: # Use _running
|
617 |
+
logger.error("Webhook triggered, but PTB Application is Not Running.")
|
|
|
|
|
|
|
|
|
|
|
618 |
return Response('Bot service not ready.', status=503)
|
619 |
|
620 |
logger.debug("Webhook request received (POST)...")
|
|
|
623 |
update_data = request.get_json()
|
624 |
update = Update.de_json(update_data, ptb_app.bot)
|
625 |
logger.debug(f"Processing update ID: {update.update_id} via webhook")
|
626 |
+
await ptb_app.process_update(update)
|
627 |
logger.debug(f"Finished processing update ID: {update.update_id}")
|
628 |
return Response('ok', status=200)
|
629 |
except json.JSONDecodeError: logger.error("Failed JSON decode from Telegram."); return Response('Bad Request: Invalid JSON', status=400)
|