fmab777 commited on
Commit
768889e
·
verified ·
1 Parent(s): afee9b9

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +71 -41
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Revised: Starlette Lifespan + ptb_app._running fix)
2
  import os
3
  import re
4
  import logging
@@ -25,8 +25,10 @@ from telegram.ext import (
25
  )
26
  from telegram.constants import ParseMode
27
  from telegram.error import NetworkError, RetryAfter # Import RetryAfter
 
28
 
29
  # --- Other Libraries ---
 
30
  from youtube_transcript_api import YouTubeTranscriptApi
31
  import requests
32
  from bs4 import BeautifulSoup
@@ -41,14 +43,14 @@ logging.basicConfig(
41
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
42
  level=logging.DEBUG
43
  )
44
- logging.getLogger("httpx").setLevel(logging.WARNING)
45
  if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
46
  logging.getLogger("telegram.ext").setLevel(logging.INFO)
47
  logging.getLogger('telegram.bot').setLevel(logging.INFO)
48
  logging.getLogger("urllib3").setLevel(logging.INFO)
49
  logging.getLogger('gunicorn.error').setLevel(logging.INFO)
50
- logging.getLogger('uvicorn').setLevel(logging.INFO)
51
- logging.getLogger('starlette').setLevel(logging.INFO)
52
  logger = logging.getLogger(__name__)
53
  logger.info("Logging configured.")
54
 
@@ -71,6 +73,7 @@ SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
71
  APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
72
  logger.info("Secret loading attempt finished.")
73
 
 
74
  # --- Bot Logic Functions ---
75
 
76
  # Helper Functions
@@ -160,7 +163,7 @@ async def get_transcript_via_apify(video_url: str, api_token: str):
160
  params = {"token": api_token}
161
  payload = json.dumps({
162
  "urls": [video_url],
163
- "outputFormat": "singleStringText", # Still request this primarily
164
  "maxRetries": 3,
165
  "channelHandleBoolean": False,
166
  "channelNameBoolean": False,
@@ -331,23 +334,53 @@ async def get_website_content_via_urltotext_api(url: str, api_key: str):
331
  except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - URLToText API] Request error: {e}"); return None
332
  except Exception as e: logger.error(f"[Web Scraper - URLToText API] Unexpected error: {e}", exc_info=True); return None
333
 
334
- # DeepSeek Summary Function
335
  async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
336
  """Generates summary using DeepSeek via OpenRouter API."""
337
  logger.info(f"Generating '{summary_type}' summary. Input length: {len(text)}")
338
  if not api_key: logger.error("OpenRouter API key missing."); return "Error: AI config key missing."
339
  if not text: logger.warning("generate_summary called with empty text."); return "Error: No content to summarize."
340
  openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"; model_name = "deepseek/deepseek-chat:free"
341
- if summary_type == "paragraph": prompt = "Please provide a concise, well-written paragraph summarizing the key information and main points of the following text. Focus on capturing the essence of the content accurately."
342
- elif summary_type == "points": prompt = "Please summarize the following text into clear, distinct bullet points. Each point should highlight a key piece of information, finding, or main topic discussed. Aim for clarity and conciseness."
343
- else: logger.error(f"Invalid summary_type '{summary_type}'."); return f"Error: Invalid summary type ('{summary_type}')."
344
- MAX_INPUT_LENGTH = 500000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input text ({len(text)}) > limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Truncated)"
346
  full_prompt = f"{prompt}\n\n--- Start of Text ---\n\n{text}\n\n--- End of Text ---"
347
- space_host = os.environ.get("SPACE_HOST", "huggingface.co/spaces/YOUR_SPACE_NAME")
 
348
  referer_url = f"https://{space_host}" if not space_host.startswith("http") else space_host
349
  headers = {"Authorization": f"Bearer {api_key}","Content-Type": "application/json","HTTP-Referer": referer_url,"X-Title": "Telegram URL Summarizer Bot"}
350
  payload = json.dumps({"model": model_name, "messages": [{"role": "user", "content": full_prompt}]})
 
351
  try:
352
  logger.debug(f"Sending request to OpenRouter (Model: {model_name})...")
353
  response = await asyncio.to_thread(requests.post, openrouter_api_endpoint, headers=headers, data=payload, timeout=90)
@@ -476,25 +509,38 @@ async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> N
476
  """Log Errors caused by Updates."""
477
  logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
478
 
479
- # --- Bot Setup Function (Configure Only) ---
480
  async def setup_bot_config() -> Application:
481
- """Configures the PTB Application but does NOT initialize or start it."""
482
  logger.info("Configuring Telegram Application...")
483
  if not TELEGRAM_TOKEN:
484
  logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable not found.")
485
  raise ValueError("TELEGRAM_TOKEN environment variable not set.")
486
- application = Application.builder().token(TELEGRAM_TOKEN).build()
 
 
 
 
 
 
 
 
 
 
 
 
487
  application.add_handler(CommandHandler("start", start))
488
  application.add_handler(CommandHandler("help", help_command))
489
  application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
490
  application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
491
  application.add_error_handler(error_handler)
 
492
  logger.info("Telegram handlers configured.")
493
  return application
494
 
495
  # --- ASGI Lifespan Context Manager ---
496
  @contextlib.asynccontextmanager
497
- async def lifespan(app: Starlette): # app argument is the Starlette instance
498
  """Handles PTB startup and shutdown during ASGI lifespan."""
499
  global ptb_app
500
  logger.info("ASGI Lifespan: Startup commencing...")
@@ -508,7 +554,6 @@ async def lifespan(app: Starlette): # app argument is the Starlette instance
508
  await ptb_app.start()
509
  logger.info(f"PTB App started. Bot details: {ptb_app.bot.username}")
510
 
511
- # Set webhook after start
512
  WEBHOOK_URL_BASE = os.environ.get("SPACE_HOST")
513
  if WEBHOOK_URL_BASE:
514
  if not WEBHOOK_URL_BASE.startswith("https://"): WEBHOOK_URL_BASE = f"https://{WEBHOOK_URL_BASE}"
@@ -516,9 +561,7 @@ async def lifespan(app: Starlette): # app argument is the Starlette instance
516
  full_webhook_url = f"{WEBHOOK_URL_BASE.rstrip('/')}{webhook_path}"
517
  logger.info(f"Attempting to set webhook to: {full_webhook_url}")
518
  try:
519
- # Add a small delay before setting webhook, especially with multiple workers
520
- # This might help avoid the initial rate limit error, although one worker succeeding is enough.
521
- await asyncio.sleep(1.5) # Wait 1.5 seconds
522
  await ptb_app.bot.set_webhook(url=full_webhook_url, allowed_updates=Update.ALL_TYPES)
523
  webhook_info = await ptb_app.bot.get_webhook_info()
524
  logger.info(f"Webhook set successfully! Info: {webhook_info}")
@@ -535,10 +578,8 @@ async def lifespan(app: Starlette): # app argument is the Starlette instance
535
  logger.critical(f"CRITICAL ERROR during ASGI startup: {startup_err}", exc_info=True)
536
  raise
537
  finally:
538
- # --- Shutdown ---
539
  logger.info("ASGI Lifespan: Shutdown commencing...")
540
- # Use the correct attribute to check if running before stopping/shutting down
541
- if ptb_app and ptb_app._running: # <--- Use _running here too
542
  try:
543
  logger.info("Stopping PTB App...")
544
  await ptb_app.stop()
@@ -547,10 +588,8 @@ async def lifespan(app: Starlette): # app argument is the Starlette instance
547
  logger.info("PTB App shut down successfully.")
548
  except Exception as shutdown_err:
549
  logger.error(f"Error during PTB shutdown: {shutdown_err}", exc_info=True)
550
- elif ptb_app:
551
- logger.warning("PTB App instance exists but was not running at shutdown.")
552
- else:
553
- logger.warning("No PTB App instance found at shutdown.")
554
  logger.info("ASGI Lifespan: Shutdown complete.")
555
 
556
 
@@ -564,27 +603,18 @@ def index():
564
  """Basic health check endpoint."""
565
  logger.debug("Health check '/' accessed.")
566
  bot_status = "UNKNOWN"
567
- if ptb_app:
568
- # --- CORRECTED CHECK ---
569
- bot_status = "Running" if ptb_app._running else "Initialized/Stopped/Starting/Error"
570
- # --- END CORRECTION ---
571
- else:
572
- bot_status = "Not Initialized"
573
  return f"Telegram Bot Webhook Listener ({bot_status}) running via Starlette."
574
 
575
  @flask_core_app.route('/webhook', methods=['POST'])
576
  async def webhook() -> Response:
577
  """Webhook endpoint for Telegram updates."""
578
- if not ptb_app: # Check if instance exists first
579
  logger.error("Webhook triggered, but PTB Application instance (ptb_app) is None.")
580
  return Response('Bot service not configured.', status=503)
581
-
582
- # --- CORRECTED CHECK ---
583
- # Use the internal _running attribute as suggested by the AttributeError
584
- if not ptb_app._running:
585
- # --- END CORRECTION ---
586
- status = "Not Running" # If instance exists but not running
587
- logger.error(f"Webhook triggered, but PTB Application is {status}.")
588
  return Response('Bot service not ready.', status=503)
589
 
590
  logger.debug("Webhook request received (POST)...")
@@ -593,7 +623,7 @@ async def webhook() -> Response:
593
  update_data = request.get_json()
594
  update = Update.de_json(update_data, ptb_app.bot)
595
  logger.debug(f"Processing update ID: {update.update_id} via webhook")
596
- await ptb_app.process_update(update) # Queue/process the update
597
  logger.debug(f"Finished processing update ID: {update.update_id}")
598
  return Response('ok', status=200)
599
  except json.JSONDecodeError: logger.error("Failed JSON decode from Telegram."); return Response('Bad Request: Invalid JSON', status=400)
 
1
+ # main.py (Revised: Starlette Lifespan + ptb_app._running fix + HTTPX Config + Updated Prompts)
2
  import os
3
  import re
4
  import logging
 
25
  )
26
  from telegram.constants import ParseMode
27
  from telegram.error import NetworkError, RetryAfter # Import RetryAfter
28
+ from telegram.request import HTTPXRequest # Import the request class
29
 
30
  # --- Other Libraries ---
31
+ import httpx # Import httpx directly
32
  from youtube_transcript_api import YouTubeTranscriptApi
33
  import requests
34
  from bs4 import BeautifulSoup
 
43
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
44
  level=logging.DEBUG
45
  )
46
+ logging.getLogger("httpx").setLevel(logging.WARNING) # Keep httpx logs quieter unless debugging it
47
  if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
48
  logging.getLogger("telegram.ext").setLevel(logging.INFO)
49
  logging.getLogger('telegram.bot').setLevel(logging.INFO)
50
  logging.getLogger("urllib3").setLevel(logging.INFO)
51
  logging.getLogger('gunicorn.error').setLevel(logging.INFO)
52
+ logging.getLogger('uvicorn').setLevel(logging.INFO) # Uvicorn logs (incl. access)
53
+ logging.getLogger('starlette').setLevel(logging.INFO) # Starlette logs
54
  logger = logging.getLogger(__name__)
55
  logger.info("Logging configured.")
56
 
 
73
  APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
74
  logger.info("Secret loading attempt finished.")
75
 
76
+
77
  # --- Bot Logic Functions ---
78
 
79
  # Helper Functions
 
163
  params = {"token": api_token}
164
  payload = json.dumps({
165
  "urls": [video_url],
166
+ "outputFormat": "singleStringText",
167
  "maxRetries": 3,
168
  "channelHandleBoolean": False,
169
  "channelNameBoolean": False,
 
334
  except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - URLToText API] Request error: {e}"); return None
335
  except Exception as e: logger.error(f"[Web Scraper - URLToText API] Unexpected error: {e}", exc_info=True); return None
336
 
337
+ # DeepSeek Summary Function (with updated prompts)
338
  async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
339
  """Generates summary using DeepSeek via OpenRouter API."""
340
  logger.info(f"Generating '{summary_type}' summary. Input length: {len(text)}")
341
  if not api_key: logger.error("OpenRouter API key missing."); return "Error: AI config key missing."
342
  if not text: logger.warning("generate_summary called with empty text."); return "Error: No content to summarize."
343
  openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"; model_name = "deepseek/deepseek-chat:free"
344
+
345
+ # --- UPDATED PROMPTS ---
346
+ if summary_type == "paragraph":
347
+ prompt = (
348
+ "You are an AI model designed to provide concise summaries using British English spellings. "
349
+ "Your output should be:\n"
350
+ "• Clear and simple, use a level of language and vocabulary such that someone who isn’t familiar with the topic mentioned would still be able to understand, keeping British spellings throughout.\n"
351
+ "• Use straightforward and understandable language. Avoid overly complex or advanced vocabulary.\n"
352
+ "• Presented in one paragraph.\n"
353
+ "• A summary that is no more than 85 words; ensure it remains concise.\n"
354
+ "• Consider the entire text’s content, not just the beginning or a few topics: give equal attention to all parts.\n"
355
+ "• Do not use em dash (– or —) symbols; use semicolons instead.\n\n"
356
+ "Now, please summarize the following text according to these rules:"
357
+ )
358
+ elif summary_type == "points":
359
+ prompt = (
360
+ "You are an AI model designed to provide concise summaries using British English spellings.\n"
361
+ "Your output should be:\n"
362
+ "• Clear and simple, use a level of language and vocabulary such that someone who isn’t familiar with the topic mentioned would still be able to understand, keeping British spellings throughout.\n"
363
+ "• Use straightforward and understandable language. Avoid overly complex or advanced vocabulary.\n"
364
+ "• Presented as clear, distinct bullet points (using '*' or '-' at the start of each point).\n"
365
+ "• Each point should highlight a key piece of information, finding, or main topic discussed.\n"
366
+ "• Consider the entire text’s content, not just the beginning or a few topics: give equal attention to all parts.\n"
367
+ "• Do not use em dash (– or —) symbols; use semicolons instead.\n\n"
368
+ "Now, please summarize the following text into bullet points according to these rules:"
369
+ )
370
+ else:
371
+ logger.error(f"Invalid summary_type '{summary_type}'.");
372
+ return f"Error: Invalid summary type ('{summary_type}')."
373
+ # --- END UPDATED PROMPTS ---
374
+
375
+ MAX_INPUT_LENGTH = 500000 # Keep practical limit
376
  if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input text ({len(text)}) > limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Truncated)"
377
  full_prompt = f"{prompt}\n\n--- Start of Text ---\n\n{text}\n\n--- End of Text ---"
378
+
379
+ space_host = os.environ.get("SPACE_HOST", "huggingface.co/spaces/YOUR_SPACE_NAME") # Replace with actual space if needed
380
  referer_url = f"https://{space_host}" if not space_host.startswith("http") else space_host
381
  headers = {"Authorization": f"Bearer {api_key}","Content-Type": "application/json","HTTP-Referer": referer_url,"X-Title": "Telegram URL Summarizer Bot"}
382
  payload = json.dumps({"model": model_name, "messages": [{"role": "user", "content": full_prompt}]})
383
+
384
  try:
385
  logger.debug(f"Sending request to OpenRouter (Model: {model_name})...")
386
  response = await asyncio.to_thread(requests.post, openrouter_api_endpoint, headers=headers, data=payload, timeout=90)
 
509
  """Log Errors caused by Updates."""
510
  logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
511
 
512
+ # --- Bot Setup Function (Modified: Configure HTTPX Client) ---
513
  async def setup_bot_config() -> Application:
514
+ """Configures the PTB Application with custom HTTPX settings."""
515
  logger.info("Configuring Telegram Application...")
516
  if not TELEGRAM_TOKEN:
517
  logger.critical("CRITICAL: TELEGRAM_TOKEN environment variable not found.")
518
  raise ValueError("TELEGRAM_TOKEN environment variable not set.")
519
+
520
+ httpx_settings = {
521
+ "timeout": httpx.Timeout(timeout=10.0, read=10.0, connect=5.0, pool=10.0),
522
+ "limits": httpx.Limits(max_keepalive_connections=10, max_connections=100, keepalive_expiry=15.0) # Increased expiry
523
+ }
524
+ logger.info(f"Creating PTB HTTPXRequest with settings: {httpx_settings}")
525
+ custom_request = HTTPXRequest(connection_pool_size=10, connect_timeout=5.0, read_timeout=10.0, http_version="1.1", httpx_settings=httpx_settings)
526
+
527
+ application_builder = Application.builder().token(TELEGRAM_TOKEN)
528
+ application_builder.request(custom_request)
529
+ application_builder.get_updates_request(custom_request)
530
+ application = application_builder.build()
531
+
532
  application.add_handler(CommandHandler("start", start))
533
  application.add_handler(CommandHandler("help", help_command))
534
  application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
535
  application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
536
  application.add_error_handler(error_handler)
537
+
538
  logger.info("Telegram handlers configured.")
539
  return application
540
 
541
  # --- ASGI Lifespan Context Manager ---
542
  @contextlib.asynccontextmanager
543
+ async def lifespan(app: Starlette):
544
  """Handles PTB startup and shutdown during ASGI lifespan."""
545
  global ptb_app
546
  logger.info("ASGI Lifespan: Startup commencing...")
 
554
  await ptb_app.start()
555
  logger.info(f"PTB App started. Bot details: {ptb_app.bot.username}")
556
 
 
557
  WEBHOOK_URL_BASE = os.environ.get("SPACE_HOST")
558
  if WEBHOOK_URL_BASE:
559
  if not WEBHOOK_URL_BASE.startswith("https://"): WEBHOOK_URL_BASE = f"https://{WEBHOOK_URL_BASE}"
 
561
  full_webhook_url = f"{WEBHOOK_URL_BASE.rstrip('/')}{webhook_path}"
562
  logger.info(f"Attempting to set webhook to: {full_webhook_url}")
563
  try:
564
+ await asyncio.sleep(1.5) # Small delay before setting webhook
 
 
565
  await ptb_app.bot.set_webhook(url=full_webhook_url, allowed_updates=Update.ALL_TYPES)
566
  webhook_info = await ptb_app.bot.get_webhook_info()
567
  logger.info(f"Webhook set successfully! Info: {webhook_info}")
 
578
  logger.critical(f"CRITICAL ERROR during ASGI startup: {startup_err}", exc_info=True)
579
  raise
580
  finally:
 
581
  logger.info("ASGI Lifespan: Shutdown commencing...")
582
+ if ptb_app and ptb_app._running: # Use _running
 
583
  try:
584
  logger.info("Stopping PTB App...")
585
  await ptb_app.stop()
 
588
  logger.info("PTB App shut down successfully.")
589
  except Exception as shutdown_err:
590
  logger.error(f"Error during PTB shutdown: {shutdown_err}", exc_info=True)
591
+ elif ptb_app: logger.warning("PTB App instance exists but was not running at shutdown.")
592
+ else: logger.warning("No PTB App instance found at shutdown.")
 
 
593
  logger.info("ASGI Lifespan: Shutdown complete.")
594
 
595
 
 
603
  """Basic health check endpoint."""
604
  logger.debug("Health check '/' accessed.")
605
  bot_status = "UNKNOWN"
606
+ if ptb_app: bot_status = "Running" if ptb_app._running else "Initialized/Stopped/Starting/Error" # Use _running
607
+ else: bot_status = "Not Initialized"
 
 
 
 
608
  return f"Telegram Bot Webhook Listener ({bot_status}) running via Starlette."
609
 
610
  @flask_core_app.route('/webhook', methods=['POST'])
611
  async def webhook() -> Response:
612
  """Webhook endpoint for Telegram updates."""
613
+ if not ptb_app:
614
  logger.error("Webhook triggered, but PTB Application instance (ptb_app) is None.")
615
  return Response('Bot service not configured.', status=503)
616
+ if not ptb_app._running: # Use _running
617
+ logger.error("Webhook triggered, but PTB Application is Not Running.")
 
 
 
 
 
618
  return Response('Bot service not ready.', status=503)
619
 
620
  logger.debug("Webhook request received (POST)...")
 
623
  update_data = request.get_json()
624
  update = Update.de_json(update_data, ptb_app.bot)
625
  logger.debug(f"Processing update ID: {update.update_id} via webhook")
626
+ await ptb_app.process_update(update)
627
  logger.debug(f"Finished processing update ID: {update.update_id}")
628
  return Response('ok', status=200)
629
  except json.JSONDecodeError: logger.error("Failed JSON decode from Telegram."); return Response('Bad Request: Invalid JSON', status=400)