fmab777 commited on
Commit
d19297a
Β·
verified Β·
1 Parent(s): 5f4b1c4

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +267 -219
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Revised: Removed query.answer() call to avoid connection issue)
2
  import os
3
  import re
4
  import logging
@@ -6,6 +6,7 @@ import asyncio
6
  import json
7
  import html # For unescaping HTML entities
8
  import contextlib # For async context manager (lifespan)
 
9
 
10
  # --- Frameworks ---
11
  from flask import Flask, request, Response # Core web routes
@@ -14,7 +15,7 @@ from starlette.routing import Mount # Mount Flask within Starlette
14
  from starlette.middleware.wsgi import WSGIMiddleware # Wrap Flask for Starlette
15
 
16
  # --- Telegram Bot ---
17
- from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
18
  from telegram.ext import (
19
  Application,
20
  CommandHandler,
@@ -24,11 +25,11 @@ from telegram.ext import (
24
  CallbackQueryHandler,
25
  )
26
  from telegram.constants import ParseMode
27
- from telegram.error import NetworkError, RetryAfter, TimedOut # Import TimedOut
28
- from telegram.request import HTTPXRequest # Import the request class
29
 
30
  # --- Other Libraries ---
31
- import httpx # Keep import, might be useful elsewhere or if upgrading PTB later
32
  from youtube_transcript_api import YouTubeTranscriptApi
33
  import requests
34
  from bs4 import BeautifulSoup
@@ -43,7 +44,6 @@ logging.basicConfig(
43
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
44
  level=logging.DEBUG
45
  )
46
- # Reduce log spam from libraries
47
  logging.getLogger("httpx").setLevel(logging.WARNING)
48
  if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
49
  logging.getLogger("telegram.ext").setLevel(logging.INFO)
@@ -52,7 +52,6 @@ logging.getLogger("urllib3").setLevel(logging.INFO)
52
  logging.getLogger('gunicorn.error').setLevel(logging.INFO)
53
  logging.getLogger('uvicorn').setLevel(logging.INFO)
54
  logging.getLogger('starlette').setLevel(logging.INFO)
55
- # Keep our app logger at DEBUG
56
  logger = logging.getLogger(__name__)
57
  logger.info("Logging configured.")
58
 
@@ -79,7 +78,7 @@ logger.info("Secret loading attempt finished.")
79
  # (Keep ALL your functions: is_youtube_url, extract_youtube_id,
80
  # get_transcript_via_supadata, get_transcript_via_apify,
81
  # get_youtube_transcript, get_website_content_via_requests,
82
- # get_website_content_via_urltotext_api, generate_summary - unchanged from previous version)
83
 
84
  # Helper Functions
85
  def is_youtube_url(url):
@@ -501,302 +500,349 @@ async def generate_summary(text: str, summary_type: str, api_key: str) -> str:
501
  except Exception as e: logger.error(f"Unexpected error occurred within generate_summary function: {e}", exc_info=True); return "Sorry, an unexpected internal error occurred while generating the summary."
502
 
503
 
504
- # --- Telegram Bot Handlers ---
505
-
506
- async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
507
- """Handles the /start command."""
508
- user = update.effective_user
509
- if not user: return
510
- logger.info(f"User {user.id} ({user.username or 'NoUsername'}) initiated /start.")
511
- mention = user.mention_html() if user.username else user.first_name
512
- start_message = (
513
- f"πŸ‘‹ Hello {mention}!\n\n"
514
- "I can summarise YouTube videos or web articles for you.\n\n"
515
- "Just send me a link (URL) and I'll ask you whether you want the summary as a paragraph or bullet points.\n\n"
516
- "Type /help for more details."
517
- )
518
- await update.message.reply_html(start_message)
519
-
520
- async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
521
- """Handles the /help command."""
522
- user = update.effective_user
523
- logger.info(f"User {user.id if user else '?'} requested /help.")
524
- help_text = (
525
- "**How to Use Me:**\n"
526
- "1. Send me a direct link (URL) to a YouTube video or a web article.\n"
527
- "2. I will ask you to choose the summary format: `Paragraph` or `Points`.\n"
528
- "3. Click the button for your preferred format.\n"
529
- "4. I'll fetch the content, summarise it using AI, and send it back to you!\n\n"
530
- "**Important Notes:**\n"
531
- "- **YouTube:** Getting transcripts can sometimes fail if they are disabled, unavailable for the video's language, or if YouTube temporarily blocks requests.\n"
532
- "- **Websites:** I do my best to extract the main article content, but complex websites (especially those heavily reliant on JavaScript or with strong anti-scraping measures) might not work perfectly. I have a fallback service to help with tricky sites.\n"
533
- "- **AI Summaries:** The AI tries its best to be accurate and follow the requested format, but errors or unexpected outputs are possible.\n"
534
- "- **Length:** Very long articles or videos might be truncated before summarization to fit within processing limits.\n\n"
535
- "Just send a link to get started!"
536
- )
537
- await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
538
-
539
- async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
540
- """Handles messages containing potential URLs."""
541
- if not update.message or not update.message.text: return
542
- message_text = update.message.text.strip()
543
- user = update.effective_user
544
- if not user: return
545
-
546
- url_pattern = r'https?://(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}(?:/[^\s]*)?'
547
- match = re.search(url_pattern, message_text)
548
-
549
- if match:
550
- url = match.group(0)
551
- logger.info(f"User {user.id} sent potential URL: {url}")
552
- context.user_data['url_to_summarize'] = url
553
- logger.debug(f"Stored URL '{url}' in user_data for user {user.id}")
554
-
555
- keyboard = [
556
- [
557
- InlineKeyboardButton("πŸ“œ Paragraph Summary", callback_data="paragraph"),
558
- InlineKeyboardButton("πŸ”Ή Bullet Points", callback_data="points")
559
- ]
560
- ]
561
- reply_markup = InlineKeyboardMarkup(keyboard)
562
- await update.message.reply_text(
563
- f"βœ… Link received:\n`{url}`\n\nChoose your desired summary format:",
564
- reply_markup=reply_markup,
565
- parse_mode=ParseMode.MARKDOWN,
566
- link_preview_options={'is_disabled': True}
567
- )
568
- else:
569
- if not message_text.startswith('/'):
570
- await update.message.reply_text("Please send me a valid URL (starting with http:// or https://) to summarize.")
571
 
 
 
 
 
 
 
 
 
 
 
 
572
 
573
- async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
574
- """Handles button presses for choosing summary type."""
575
- query = update.callback_query
576
- if not query or not query.from_user:
577
- logger.warning("Callback query or user missing in update.")
578
- return
579
- user = query.from_user
580
-
581
- # --- REMOVED/COMMENTED OUT query.answer() ---
582
- # logger.debug(f"Attempting to answer callback query {query.id} for user {user.id}")
583
- # try:
584
- # await query.answer()
585
- # logger.debug(f"Successfully answered callback query {query.id} for user {user.id}")
586
- # except TimedOut:
587
- # logger.warning(f"Timeout answering callback query {query.id} for user {user.id}. Processing continues.")
588
- # except Exception as e:
589
- # logger.error(f"Error answering callback query {query.id} for user {user.id}: {e!r}", exc_info=True)
590
- # --- END REMOVAL ---
591
-
592
- summary_type = query.data
593
- url = context.user_data.get('url_to_summarize')
594
- # Add log here since answer is removed
595
- logger.info(f"User {user.id} chose summary type '{summary_type}'. Checking for stored URL (query_id: {query.id}).")
596
-
597
-
598
- if not url:
599
- logger.warning(f"User {user.id} pressed button '{summary_type}', but NO URL found in user_data context.")
600
- try:
601
- # Try to edit the original message even if we didn't answer the query
602
- await query.edit_message_text(text="⚠️ Oops! I lost the context for that link. Please send the link again.")
603
- except TimedOut:
604
- logger.error(f"Timeout trying to edit message to inform user {user.id} about lost context.")
605
- except Exception as edit_err:
606
- logger.error(f"Failed to edit message for lost context for user {user.id}: {edit_err}")
607
- return
608
-
609
- logger.info(f"Processing URL '{url}' for user {user.id} with type '{summary_type}'.")
610
- context.user_data.pop('url_to_summarize', None)
611
- logger.debug(f"Cleared URL from user_data for user {user.id}")
612
-
613
  current_openrouter_key = os.environ.get('OPENROUTER_API_KEY')
614
  current_urltotext_key = os.environ.get('URLTOTEXT_API_KEY')
615
  current_supadata_key = os.environ.get('SUPADATA_API_KEY')
616
  current_apify_token = os.environ.get('APIFY_API_TOKEN')
 
617
  keys_present = f"OR={'Y' if current_openrouter_key else 'N'}, UTT={'Y' if current_urltotext_key else 'N'}, SD={'Y' if current_supadata_key else 'N'}, AP={'Y' if current_apify_token else 'N'}"
618
- logger.debug(f"API Key check for user {user.id} request: {keys_present}")
619
 
620
  if not current_openrouter_key:
621
- logger.error(f"CRITICAL: OpenRouter API key is missing. Cannot generate summary for user {user.id}.")
622
  try:
623
- await query.edit_message_text(text="❌ Configuration Error: The AI summarization service is not configured correctly. Please contact the administrator.")
624
- except TimedOut:
625
- logger.error(f"Timeout editing message to inform user {user.id} about missing AI key.")
 
 
 
626
  except Exception as edit_err:
627
- logger.error(f"Failed to edit message for missing AI key for user {user.id}: {edit_err}")
628
- return
629
 
 
630
  processing_message_text = f"⏳ Working on your '{summary_type}' summary for the link...\n_(This might take up to a minute depending on the content)_"
631
- message_to_edit = query.message
632
- status_message_sent = None
633
 
634
- # Now, attempt to edit the message (this is the call that previously timed out)
635
  try:
636
- if message_to_edit:
637
- await query.edit_message_text(text=processing_message_text)
638
- logger.debug(f"Successfully edited original message {message_to_edit.message_id} to show 'Working...' status for query {query.id} (after skipping answer).")
639
- else:
640
- logger.warning(f"Original message (query.message) not found for query {query.id}. Cannot edit, will send new status message.")
641
- raise ValueError("Original message object missing") # Force fallback
642
- except TimedOut as e:
643
- # If this STILL times out even without query.answer(), the pool issue might be deeper
644
- logger.error(f"POOL TIMEOUT occurred on edit_message_text even after skipping query.answer(). Query {query.id}. Error: {e!r}. Pool might still be blocked.")
645
- message_to_edit = None
646
- # Try sending a new message, but expect it might also fail
647
  try:
648
- status_message_sent = await context.bot.send_message(chat_id=user.id, text=f"{processing_message_text}\n_(Experiencing delays)_")
649
- logger.debug(f"Sent new status message {status_message_sent.message_id} to user {user.id} after edit timeout.")
650
- except TimedOut:
651
- logger.error(f"Timeout sending NEW 'Working...' status message to user {user.id} after edit also timed out.")
652
  except Exception as send_err:
653
- logger.error(f"Failed sending NEW 'Working...' status message to user {user.id} after edit also timed out: {send_err}")
 
654
  except Exception as e:
655
- # Handle other errors during edit_message_text
656
- logger.warning(f"Could not edit original message {message_to_edit.message_id if message_to_edit else 'N/A'} for query {query.id}: {e!r}. Attempting to send a new status message.")
657
- message_to_edit = None
658
- try:
659
- status_message_sent = await context.bot.send_message(chat_id=user.id, text=processing_message_text)
660
- logger.debug(f"Sent new status message {status_message_sent.message_id} to user {user.id}.")
661
- except TimedOut:
662
- logger.error(f"Timeout sending NEW 'Working...' status message to user {user.id} after edit failed.")
663
- except Exception as send_err:
664
- logger.error(f"Failed sending NEW 'Working...' status message to user {user.id} after edit failed: {send_err}")
665
 
 
666
  content = None
667
  user_feedback_message = None
668
  success = False
669
 
670
- # --- Main processing logic (unchanged) ---
671
  try:
 
672
  try:
673
- logger.debug(f"Sending 'typing' chat action to chat {user.id}")
674
- await context.bot.send_chat_action(chat_id=user.id, action='typing')
675
- except TimedOut: logger.warning(f"Timeout sending 'typing' action for user {user.id}.")
676
- except Exception as ca_err: logger.warning(f"Failed sending 'typing' action for user {user.id}: {ca_err}")
677
 
 
678
  is_yt = is_youtube_url(url)
679
- logger.debug(f"URL ({url}) is YouTube: {is_yt} (User: {user.id})")
680
 
681
  if is_yt:
682
  video_id = extract_youtube_id(url)
683
  if video_id:
684
- logger.info(f"Fetching YouTube transcript for video ID: {video_id} (User: {user.id})")
685
  content = await get_youtube_transcript(video_id, url, current_supadata_key, current_apify_token)
686
  if not content:
687
- logger.warning(f"Failed to get YouTube transcript for {video_id} (User: {user.id}).")
688
  user_feedback_message = "⚠️ Sorry, I couldn't retrieve the transcript for that YouTube video. It might be unavailable, private, have captions disabled, or an error occurred."
689
  else:
690
- logger.info(f"Successfully fetched YouTube transcript for {video_id}. Length: {len(content)} (User: {user.id})")
691
  else:
692
- logger.warning(f"Failed to extract YouTube video ID from URL: {url} (User: {user.id})")
693
  user_feedback_message = "⚠️ Sorry, I couldn't identify a valid YouTube video ID in the link you provided."
694
  else:
695
- logger.info(f"Attempting website scrape (Requests/BS4) for URL: {url} (User: {user.id})")
 
696
  content = await get_website_content_via_requests(url)
697
  if content:
698
- logger.info(f"Website scrape successful (Requests/BS4). Length: {len(content)} (User: {user.id})")
699
  else:
700
- logger.warning(f"Primary website scrape failed for {url} (User: {user.id}). Trying fallback API.")
701
  if current_urltotext_key:
702
- try: await context.bot.send_chat_action(chat_id=user.id, action='typing'); logger.debug("Sent typing before fallback scrape.")
703
  except: pass
704
 
705
- logger.info(f"Attempting website scrape via URLToText API for: {url} (User: {user.id})")
706
  content = await get_website_content_via_urltotext_api(url, current_urltotext_key)
707
  if content:
708
- logger.info(f"Website scrape successful via URLToText API. Length: {len(content)} (User: {user.id})")
709
  else:
710
- logger.warning(f"Fallback website scrape (URLToText API) also failed for {url} (User: {user.id}).")
711
  user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website using available methods. It might be protected or structured in a way I can't parse."
712
  else:
713
- logger.warning(f"Primary scrape failed and URLToText API key not configured. Cannot fallback for {url} (User: {user.id}).")
714
  user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website, and the fallback service isn't configured."
715
 
 
716
  if content:
717
- logger.info(f"Content fetched (Length: {len(content)}). Generating '{summary_type}' summary for user {user.id}.")
718
- try: await context.bot.send_chat_action(chat_id=user.id, action='typing'); logger.debug("Sent typing before AI summary generation.")
719
  except: pass
720
 
721
  summary = await generate_summary(content, summary_type, current_openrouter_key)
722
 
723
  if summary.startswith("Error:") or summary.startswith("Sorry,"):
724
- logger.warning(f"AI summary generation failed for user {user.id}. Reason: {summary}")
725
  user_feedback_message = f"⚠️ {summary}"
726
  else:
727
- logger.info(f"Summary generated successfully for user {user.id}. Length: {len(summary)}. Sending result.")
 
728
  try:
729
- await context.bot.send_message(
730
- chat_id=user.id,
731
  text=summary,
732
  parse_mode=ParseMode.MARKDOWN,
733
  link_preview_options={'is_disabled': True}
734
  )
735
  success = True
736
  user_feedback_message = None
737
- logger.info(f"Successfully sent summary to user {user.id}.")
738
- except TimedOut:
739
- logger.error(f"Timeout sending final summary message to user {user.id}.")
740
- user_feedback_message = "⚠️ Sorry, there was a timeout while trying to send you the final summary."
741
- success = False
742
  except Exception as send_final_err:
743
- logger.error(f"Failed sending final summary to user {user.id}: {send_final_err}", exc_info=True)
744
  user_feedback_message = "⚠️ Sorry, an unexpected error occurred while sending the final summary."
745
  success = False
746
 
747
  elif not user_feedback_message:
748
- logger.warning(f"Content retrieval resulted in None, but no specific user feedback message was set. URL: {url} (User: {user.id})")
749
  user_feedback_message = "⚠️ Sorry, I couldn't retrieve any usable content from the link provided."
750
 
 
751
  if user_feedback_message and not success:
752
- logger.warning(f"Processing failed or summary sending failed for user {user.id}. Sending feedback: {user_feedback_message}")
753
  try:
754
- await context.bot.send_message(chat_id=user.id, text=user_feedback_message)
755
- except TimedOut:
756
- logger.error(f"Timeout sending final FAILURE feedback message to user {user.id}.")
757
  except Exception as send_feedback_err:
758
- logger.error(f"Failed sending final FAILURE feedback message to user {user.id}: {send_feedback_err}")
759
 
760
  except Exception as e:
761
- logger.error(f"Unexpected critical error during callback processing for user {user.id}, URL {url}: {e}", exc_info=True)
 
762
  try:
763
- await context.bot.send_message(chat_id=user.id, text="❌ Oops! An unexpected internal error occurred while processing your request. The issue has been logged.")
764
- except TimedOut:
765
- logger.error(f"Timeout sending CRITICAL internal error feedback message to user {user.id}.")
766
  except Exception as final_err:
767
- logger.error(f"Failed sending CRITICAL internal error feedback message to user {user.id}: {final_err}")
768
  success = False
769
 
770
  finally:
771
- logger.debug(f"Cleaning up status message(s) for user {user.id}, query {query.id}. Success={success}")
 
772
  try:
773
- if status_message_sent:
774
- await context.bot.delete_message(chat_id=user.id, message_id=status_message_sent.message_id)
775
- logger.debug(f"Deleted separate status message {status_message_sent.message_id} for user {user.id}.")
776
- elif message_to_edit:
 
 
777
  if success:
778
- await query.delete_message()
779
- logger.debug(f"Processing succeeded. Deleted original (edited) message {message_to_edit.message_id} for query {query.id}.")
 
780
  else:
781
- logger.debug(f"Processing failed. Leaving edited message {message_to_edit.message_id} in place for query {query.id}.")
 
 
 
 
 
 
 
782
 
783
- except TimedOut:
784
- logger.warning(f"Timeout attempting to delete status/button message for user {user.id}, query {query.id}.")
785
  except Exception as del_e:
786
- logger.warning(f"Could not delete status/button message for user {user.id}, query {query.id}: {del_e!r}")
787
 
788
- logger.info(f"Finished handling callback query {query.id} for user {user.id}. Overall Success: {success}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
789
 
790
 
791
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
792
- """Log Errors caused by Updates."""
793
- logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
794
- if isinstance(context.error, TimedOut):
795
- logger.warning("A timeout error occurred in PTB communication.")
796
- elif isinstance(context.error, NetworkError):
797
- logger.warning(f"A network error occurred: {context.error}")
798
-
799
- # --- Bot Setup Function (Corrected: Removed invalid 'limits' param for PTB v20) ---
 
 
 
 
 
 
 
800
  async def setup_bot_config() -> Application:
801
  """Configures the PTB Application with custom HTTPX settings for PTB v20.x."""
802
  logger.info("Configuring Telegram Application...")
@@ -824,13 +870,12 @@ async def setup_bot_config() -> Application:
824
 
825
  application_builder = Application.builder().token(TELEGRAM_TOKEN)
826
  application_builder.request(custom_request)
827
-
828
  application = application_builder.build()
829
 
830
  application.add_handler(CommandHandler("start", start))
831
  application.add_handler(CommandHandler("help", help_command))
832
  application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
833
- application.add_handler(CallbackQueryHandler(handle_summary_type_callback))
834
  application.add_error_handler(error_handler)
835
 
836
  logger.info("Telegram application handlers configured.")
@@ -850,8 +895,6 @@ async def lifespan(app: Starlette):
850
  logger.info("PTB Application initialized. Starting background tasks (e.g., job queue)...")
851
  await ptb_app.start() # Starts dispatcher, job queue, etc. but NOT polling
852
 
853
- # Removed updater.stop() call here as it caused RuntimeError
854
-
855
  bot_instance = ptb_app.bot
856
  bot_info = await bot_instance.get_me()
857
  logger.info(f"PTB Application started successfully. Bot ID: {bot_info.id}, Username: @{bot_info.username}")
@@ -868,6 +911,7 @@ async def lifespan(app: Starlette):
868
  await bot_instance.set_webhook(
869
  url=full_webhook_url,
870
  allowed_updates=Update.ALL_TYPES,
 
871
  )
872
  webhook_info = await bot_instance.get_webhook_info()
873
  if webhook_info and webhook_info.url == full_webhook_url:
@@ -894,6 +938,8 @@ async def lifespan(app: Starlette):
894
 
895
  except Exception as startup_err:
896
  logger.critical(f"CRITICAL ERROR during ASGI application startup: {startup_err}", exc_info=True)
 
 
897
  raise
898
  finally:
899
  # --- Shutdown Sequence ---
@@ -954,7 +1000,6 @@ async def webhook() -> Response:
954
 
955
  logger.debug("Webhook endpoint received POST request from Telegram.")
956
  try:
957
- # Use Flask's synchronous get_json within the async route via WSGIMiddleware
958
  update_data = request.get_json()
959
  if not update_data:
960
  logger.warning("Received empty or non-JSON data on webhook.")
@@ -964,10 +1009,13 @@ async def webhook() -> Response:
964
  logger.debug(f"Processing update_id: {update.update_id} via webhook route.")
965
 
966
  # Let PTB's dispatcher handle the update asynchronously
 
 
967
  await ptb_app.process_update(update)
968
 
969
- logger.debug(f"Finished processing update_id: {update.update_id}")
970
- return Response('ok', status=200) # Acknowledge receipt to Telegram
 
971
 
972
  except json.JSONDecodeError:
973
  logger.error("Failed to decode JSON from Telegram webhook request.", exc_info=True)
 
1
+ # main.py (Revised: Use asyncio.create_task for callback processing)
2
  import os
3
  import re
4
  import logging
 
6
  import json
7
  import html # For unescaping HTML entities
8
  import contextlib # For async context manager (lifespan)
9
+ import traceback # For logging exceptions in tasks
10
 
11
  # --- Frameworks ---
12
  from flask import Flask, request, Response # Core web routes
 
15
  from starlette.middleware.wsgi import WSGIMiddleware # Wrap Flask for Starlette
16
 
17
  # --- Telegram Bot ---
18
+ from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup, Bot # Import Bot
19
  from telegram.ext import (
20
  Application,
21
  CommandHandler,
 
25
  CallbackQueryHandler,
26
  )
27
  from telegram.constants import ParseMode
28
+ from telegram.error import NetworkError, RetryAfter, TimedOut, BadRequest # Import BadRequest
29
+ from telegram.request import HTTPXRequest
30
 
31
  # --- Other Libraries ---
32
+ import httpx
33
  from youtube_transcript_api import YouTubeTranscriptApi
34
  import requests
35
  from bs4 import BeautifulSoup
 
44
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
45
  level=logging.DEBUG
46
  )
 
47
  logging.getLogger("httpx").setLevel(logging.WARNING)
48
  if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING)
49
  logging.getLogger("telegram.ext").setLevel(logging.INFO)
 
52
  logging.getLogger('gunicorn.error').setLevel(logging.INFO)
53
  logging.getLogger('uvicorn').setLevel(logging.INFO)
54
  logging.getLogger('starlette').setLevel(logging.INFO)
 
55
  logger = logging.getLogger(__name__)
56
  logger.info("Logging configured.")
57
 
 
78
  # (Keep ALL your functions: is_youtube_url, extract_youtube_id,
79
  # get_transcript_via_supadata, get_transcript_via_apify,
80
  # get_youtube_transcript, get_website_content_via_requests,
81
+ # get_website_content_via_urltotext_api, generate_summary - unchanged)
82
 
83
  # Helper Functions
84
  def is_youtube_url(url):
 
500
  except Exception as e: logger.error(f"Unexpected error occurred within generate_summary function: {e}", exc_info=True); return "Sorry, an unexpected internal error occurred while generating the summary."
501
 
502
 
503
+ # --- Background Task Processing ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
 
505
+ async def process_summary_task(
506
+ user_id: int,
507
+ chat_id: int,
508
+ message_id_to_edit: int,
509
+ url: str,
510
+ summary_type: str,
511
+ bot: Bot # Pass the Bot instance
512
+ ) -> None:
513
+ """Handles the actual fetching and summarization in a background task."""
514
+ task_id = asyncio.current_task().get_name() if hasattr(asyncio.current_task(), 'get_name') else id(asyncio.current_task())
515
+ logger.info(f"[Task {task_id}] Starting processing for user {user_id}, chat {chat_id}, msg {message_id_to_edit}, type '{summary_type}'")
516
 
517
+ # Fetch current API keys within the task
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
518
  current_openrouter_key = os.environ.get('OPENROUTER_API_KEY')
519
  current_urltotext_key = os.environ.get('URLTOTEXT_API_KEY')
520
  current_supadata_key = os.environ.get('SUPADATA_API_KEY')
521
  current_apify_token = os.environ.get('APIFY_API_TOKEN')
522
+ # Keys check (already done in handler, but good for task log context)
523
  keys_present = f"OR={'Y' if current_openrouter_key else 'N'}, UTT={'Y' if current_urltotext_key else 'N'}, SD={'Y' if current_supadata_key else 'N'}, AP={'Y' if current_apify_token else 'N'}"
524
+ logger.debug(f"[Task {task_id}] API Key check: {keys_present}")
525
 
526
  if not current_openrouter_key:
527
+ logger.error(f"[Task {task_id}] CRITICAL: OpenRouter API key is missing. Cannot generate summary.")
528
  try:
529
+ # Edit the original message to show the config error
530
+ await bot.edit_message_text(
531
+ chat_id=chat_id,
532
+ message_id=message_id_to_edit,
533
+ text="❌ Configuration Error: The AI summarization service is not configured correctly. Please contact the administrator."
534
+ )
535
  except Exception as edit_err:
536
+ logger.error(f"[Task {task_id}] Failed to edit message for missing AI key: {edit_err}")
537
+ return # Stop task
538
 
539
+ # --- Inform User Processing Has Started ---
540
  processing_message_text = f"⏳ Working on your '{summary_type}' summary for the link...\n_(This might take up to a minute depending on the content)_"
541
+ status_message_sent_id = None # Track if we sent a separate message
 
542
 
 
543
  try:
544
+ await bot.edit_message_text(
545
+ chat_id=chat_id,
546
+ message_id=message_id_to_edit,
547
+ text=processing_message_text
548
+ )
549
+ logger.debug(f"[Task {task_id}] Successfully edited message {message_id_to_edit} to 'Working...'")
550
+ except (TimedOut, NetworkError, BadRequest) as e: # Catch specific Telegram errors
551
+ # If editing fails (message too old, deleted, bot blocked, rate limit, etc.)
552
+ logger.warning(f"[Task {task_id}] Could not edit original message {message_id_to_edit}: {e!r}. Sending new status message.")
553
+ message_id_to_edit = None # Mark original message as uneditable/not-to-be-deleted later
 
554
  try:
555
+ status_message = await bot.send_message(chat_id=chat_id, text=processing_message_text)
556
+ status_message_sent_id = status_message.message_id
557
+ logger.debug(f"[Task {task_id}] Sent new status message {status_message_sent_id}.")
 
558
  except Exception as send_err:
559
+ logger.error(f"[Task {task_id}] Failed sending NEW 'Working...' status message: {send_err}. Processing continues without feedback.")
560
+ # Proceed, but user gets no feedback
561
  except Exception as e:
562
+ # Catch unexpected errors during editing
563
+ logger.error(f"[Task {task_id}] Unexpected error editing message {message_id_to_edit}: {e!r}", exc_info=True)
564
+ message_id_to_edit = None # Assume original message is problematic
 
 
 
 
 
 
 
565
 
566
+ # --- Main Content Fetching and Summarization ---
567
  content = None
568
  user_feedback_message = None
569
  success = False
570
 
 
571
  try:
572
+ # Send 'typing' action to indicate activity
573
  try:
574
+ logger.debug(f"[Task {task_id}] Sending 'typing' chat action to chat {chat_id}")
575
+ await bot.send_chat_action(chat_id=chat_id, action='typing')
576
+ except Exception as ca_err:
577
+ logger.warning(f"[Task {task_id}] Failed sending 'typing' action: {ca_err}")
578
 
579
+ # --- Determine Content Type and Fetch ---
580
  is_yt = is_youtube_url(url)
581
+ logger.debug(f"[Task {task_id}] URL ({url}) is YouTube: {is_yt}")
582
 
583
  if is_yt:
584
  video_id = extract_youtube_id(url)
585
  if video_id:
586
+ logger.info(f"[Task {task_id}] Fetching YouTube transcript for video ID: {video_id}")
587
  content = await get_youtube_transcript(video_id, url, current_supadata_key, current_apify_token)
588
  if not content:
589
+ logger.warning(f"[Task {task_id}] Failed to get YouTube transcript for {video_id}.")
590
  user_feedback_message = "⚠️ Sorry, I couldn't retrieve the transcript for that YouTube video. It might be unavailable, private, have captions disabled, or an error occurred."
591
  else:
592
+ logger.info(f"[Task {task_id}] Successfully fetched YouTube transcript for {video_id}. Length: {len(content)}")
593
  else:
594
+ logger.warning(f"[Task {task_id}] Failed to extract YouTube video ID from URL: {url}")
595
  user_feedback_message = "⚠️ Sorry, I couldn't identify a valid YouTube video ID in the link you provided."
596
  else:
597
+ # --- Website Scraping ---
598
+ logger.info(f"[Task {task_id}] Attempting website scrape (Requests/BS4) for URL: {url}")
599
  content = await get_website_content_via_requests(url)
600
  if content:
601
+ logger.info(f"[Task {task_id}] Website scrape successful (Requests/BS4). Length: {len(content)}")
602
  else:
603
+ logger.warning(f"[Task {task_id}] Primary website scrape failed for {url}. Trying fallback API.")
604
  if current_urltotext_key:
605
+ try: await bot.send_chat_action(chat_id=chat_id, action='typing'); logger.debug("[Task {task_id}] Sent typing before fallback scrape.")
606
  except: pass
607
 
608
+ logger.info(f"[Task {task_id}] Attempting website scrape via URLToText API for: {url}")
609
  content = await get_website_content_via_urltotext_api(url, current_urltotext_key)
610
  if content:
611
+ logger.info(f"[Task {task_id}] Website scrape successful via URLToText API. Length: {len(content)}")
612
  else:
613
+ logger.warning(f"[Task {task_id}] Fallback website scrape (URLToText API) also failed for {url}.")
614
  user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website using available methods. It might be protected or structured in a way I can't parse."
615
  else:
616
+ logger.warning(f"[Task {task_id}] Primary scrape failed and URLToText API key not configured. Cannot fallback for {url}.")
617
  user_feedback_message = "⚠️ Sorry, I couldn't fetch the content from that website, and the fallback service isn't configured."
618
 
619
+ # --- Generate Summary if Content Was Fetched ---
620
  if content:
621
+ logger.info(f"[Task {task_id}] Content fetched (Length: {len(content)}). Generating '{summary_type}' summary.")
622
+ try: await bot.send_chat_action(chat_id=chat_id, action='typing'); logger.debug("[Task {task_id}] Sent typing before AI summary generation.")
623
  except: pass
624
 
625
  summary = await generate_summary(content, summary_type, current_openrouter_key)
626
 
627
  if summary.startswith("Error:") or summary.startswith("Sorry,"):
628
+ logger.warning(f"[Task {task_id}] AI summary generation failed. Reason: {summary}")
629
  user_feedback_message = f"⚠️ {summary}"
630
  else:
631
+ # --- Summary Success - Send to User ---
632
+ logger.info(f"[Task {task_id}] Summary generated successfully. Length: {len(summary)}. Sending result.")
633
  try:
634
+ await bot.send_message(
635
+ chat_id=chat_id,
636
  text=summary,
637
  parse_mode=ParseMode.MARKDOWN,
638
  link_preview_options={'is_disabled': True}
639
  )
640
  success = True
641
  user_feedback_message = None
642
+ logger.info(f"[Task {task_id}] Successfully sent summary to chat {chat_id}.")
 
 
 
 
643
  except Exception as send_final_err:
644
+ logger.error(f"[Task {task_id}] Failed sending final summary to chat {chat_id}: {send_final_err}", exc_info=True)
645
  user_feedback_message = "⚠️ Sorry, an unexpected error occurred while sending the final summary."
646
  success = False
647
 
648
  elif not user_feedback_message:
649
+ logger.warning(f"[Task {task_id}] Content retrieval resulted in None, but no specific user feedback message was set. URL: {url}")
650
  user_feedback_message = "⚠️ Sorry, I couldn't retrieve any usable content from the link provided."
651
 
652
+ # --- Send Final Feedback Message if Processing Failed ---
653
  if user_feedback_message and not success:
654
+ logger.warning(f"[Task {task_id}] Processing failed or summary sending failed. Sending feedback: {user_feedback_message}")
655
  try:
656
+ await bot.send_message(chat_id=chat_id, text=user_feedback_message)
 
 
657
  except Exception as send_feedback_err:
658
+ logger.error(f"[Task {task_id}] Failed sending final FAILURE feedback message to chat {chat_id}: {send_feedback_err}")
659
 
660
  except Exception as e:
661
+ # Catch-all for unexpected errors during the main processing block
662
+ logger.error(f"[Task {task_id}] Unexpected critical error during task processing for user {user_id}, URL {url}: {e}", exc_info=True)
663
  try:
664
+ await bot.send_message(chat_id=chat_id, text="❌ Oops! An unexpected internal error occurred while processing your request. The issue has been logged.")
 
 
665
  except Exception as final_err:
666
+ logger.error(f"[Task {task_id}] Failed sending CRITICAL internal error feedback message to chat {chat_id}: {final_err}")
667
  success = False
668
 
669
  finally:
670
+ # --- Clean up Status Message(s) ---
671
+ logger.debug(f"[Task {task_id}] Cleaning up status message(s). Success={success}")
672
  try:
673
+ if status_message_sent_id:
674
+ # If we sent a separate "Working..." message, delete it.
675
+ await bot.delete_message(chat_id=chat_id, message_id=status_message_sent_id)
676
+ logger.debug(f"[Task {task_id}] Deleted separate status message {status_message_sent_id}.")
677
+ elif message_id_to_edit:
678
+ # If we successfully edited the original message...
679
  if success:
680
+ # If processing succeeded, delete the original "Working..." message.
681
+ await bot.delete_message(chat_id=chat_id, message_id=message_id_to_edit)
682
+ logger.debug(f"[Task {task_id}] Processing succeeded. Deleted original (edited) message {message_id_to_edit}.")
683
  else:
684
+ # If processing failed, edit the message to show failure (if possible)
685
+ logger.debug(f"[Task {task_id}] Processing failed. Attempting to edit message {message_id_to_edit} to show error.")
686
+ final_error_text = user_feedback_message or "❌ An error occurred."
687
+ try:
688
+ await bot.edit_message_text(chat_id=chat_id, message_id=message_id_to_edit, text=final_error_text[:4090]) # Truncate error message if needed
689
+ except Exception as final_edit_err:
690
+ logger.warning(f"[Task {task_id}] Could not edit message {message_id_to_edit} to show final error state: {final_edit_err!r}")
691
+ # If message_id_to_edit is None, we couldn't edit it initially, and if status_message_sent_id is None, we failed to send a new one. Nothing to clean up.
692
 
 
 
693
  except Exception as del_e:
694
+ logger.warning(f"[Task {task_id}] Could not delete status/button message during cleanup: {del_e!r}")
695
 
696
+ logger.info(f"[Task {task_id}] Finished task processing for user {user_id}. Overall Success: {success}")
697
+
698
+ # --- Telegram Bot Handlers ---
699
+
700
+ async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
701
+ """Handles the /start command."""
702
+ user = update.effective_user
703
+ if not user: return
704
+ logger.info(f"User {user.id} ({user.username or 'NoUsername'}) initiated /start.")
705
+ mention = user.mention_html() if user.username else user.first_name
706
+ start_message = (
707
+ f"πŸ‘‹ Hello {mention}!\n\n"
708
+ "I can summarise YouTube videos or web articles for you.\n\n"
709
+ "Just send me a link (URL) and I'll ask you whether you want the summary as a paragraph or bullet points.\n\n"
710
+ "Type /help for more details."
711
+ )
712
+ await update.message.reply_html(start_message)
713
+
714
+ async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
715
+ """Handles the /help command."""
716
+ user = update.effective_user
717
+ logger.info(f"User {user.id if user else '?'} requested /help.")
718
+ help_text = (
719
+ "**How to Use Me:**\n"
720
+ "1. Send me a direct link (URL) to a YouTube video or a web article.\n"
721
+ "2. I will ask you to choose the summary format: `Paragraph` or `Points`.\n"
722
+ "3. Click the button for your preferred format.\n"
723
+ "4. I'll fetch the content, summarise it using AI, and send it back to you!\n\n"
724
+ "**Important Notes:**\n"
725
+ "- **YouTube:** Getting transcripts can sometimes fail if they are disabled, unavailable for the video's language, or if YouTube temporarily blocks requests.\n"
726
+ "- **Websites:** I do my best to extract the main article content, but complex websites (especially those heavily reliant on JavaScript or with strong anti-scraping measures) might not work perfectly. I have a fallback service to help with tricky sites.\n"
727
+ "- **AI Summaries:** The AI tries its best to be accurate and follow the requested format, but errors or unexpected outputs are possible.\n"
728
+ "- **Length:** Very long articles or videos might be truncated before summarization to fit within processing limits.\n\n"
729
+ "Just send a link to get started!"
730
+ )
731
+ await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
732
+
733
+ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
734
+ """Handles messages containing potential URLs."""
735
+ if not update.message or not update.message.text: return
736
+ message_text = update.message.text.strip()
737
+ user = update.effective_user
738
+ if not user: return
739
+
740
+ url_pattern = r'https?://(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}(?:/[^\s]*)?'
741
+ match = re.search(url_pattern, message_text)
742
+
743
+ if match:
744
+ url = match.group(0)
745
+ logger.info(f"User {user.id} sent potential URL: {url}")
746
+ context.user_data['url_to_summarize'] = url
747
+ logger.debug(f"Stored URL '{url}' in user_data for user {user.id}")
748
+
749
+ keyboard = [
750
+ [
751
+ InlineKeyboardButton("πŸ“œ Paragraph Summary", callback_data="paragraph"),
752
+ InlineKeyboardButton("πŸ”Ή Bullet Points", callback_data="points")
753
+ ]
754
+ ]
755
+ reply_markup = InlineKeyboardMarkup(keyboard)
756
+ await update.message.reply_text(
757
+ f"βœ… Link received:\n`{url}`\n\nChoose your desired summary format:",
758
+ reply_markup=reply_markup,
759
+ parse_mode=ParseMode.MARKDOWN,
760
+ link_preview_options={'is_disabled': True}
761
+ )
762
+ else:
763
+ if not message_text.startswith('/'):
764
+ await update.message.reply_text("Please send me a valid URL (starting with http:// or https://) to summarize.")
765
+
766
+
767
+ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
768
+ """Handles button presses: gets data, clears context, schedules background task."""
769
+ query = update.callback_query
770
+ if not query or not query.from_user or not query.message:
771
+ logger.warning("Callback query, user, or message missing in update.")
772
+ # Try answering query even if message is missing, just to acknowledge
773
+ if query:
774
+ try: await query.answer("Error: Missing information.")
775
+ except Exception: pass
776
+ return
777
+ user = query.from_user
778
+
779
+ # We skip query.answer() here to avoid potential connection issues.
780
+ # The button might stay loading visually for the user.
781
+
782
+ summary_type = query.data
783
+ url = context.user_data.get('url_to_summarize')
784
+ query_id = query.id # For logging
785
+
786
+ logger.info(f"User {user.id} chose summary type '{summary_type}'. Query ID: {query_id}. Checking for stored URL.")
787
+
788
+ if not url:
789
+ logger.warning(f"User {user.id} (Query {query_id}) pressed button '{summary_type}', but NO URL found in user_data context.")
790
+ try:
791
+ await query.edit_message_text(text="⚠️ Oops! I lost the context for that link. Please send the link again.")
792
+ except Exception as edit_err:
793
+ logger.error(f"Failed to edit message for lost context (Query {query_id}): {edit_err}")
794
+ # Still try to answer the query if editing failed
795
+ try: await query.answer("Error processing request.")
796
+ except Exception: pass
797
+ return
798
+
799
+ logger.info(f"Scheduling background task for URL '{url}' (User {user.id}, Query {query_id}, Type '{summary_type}').")
800
+
801
+ # Extract necessary IDs before clearing data
802
+ user_id = user.id
803
+ chat_id = query.message.chat_id
804
+ message_id_to_edit = query.message.message_id
805
+ bot_instance = context.bot # Get the bot instance from context
806
+
807
+ # Clear the URL from context *before* scheduling the task
808
+ context.user_data.pop('url_to_summarize', None)
809
+ logger.debug(f"Cleared URL from user_data for user {user_id} (Query {query_id})")
810
+
811
+ # Schedule the actual processing function to run in the background
812
+ # Pass all required data explicitly
813
+ asyncio.create_task(
814
+ process_summary_task(
815
+ user_id=user_id,
816
+ chat_id=chat_id,
817
+ message_id_to_edit=message_id_to_edit,
818
+ url=url,
819
+ summary_type=summary_type,
820
+ bot=bot_instance
821
+ ),
822
+ name=f"SummaryTask-{user_id}-{message_id_to_edit}" # Optional: name the task
823
+ )
824
+
825
+ # Log that the task was scheduled and the handler is returning.
826
+ logger.debug(f"Callback handler for Query {query_id} finished after scheduling task.")
827
+ # DO NOT await the task here. Let the handler return immediately.
828
 
829
 
830
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
831
+ """Log Errors caused by Updates or background tasks."""
832
+ # Check if the error is from an Exception raised in a handler
833
+ if context.error:
834
+ logger.error(f"Exception while handling an update: {context.error}", exc_info=context.error)
835
+ if isinstance(context.error, TimedOut):
836
+ logger.warning("A timeout error occurred in PTB communication.")
837
+ elif isinstance(context.error, NetworkError):
838
+ logger.warning(f"A network error occurred: {context.error}")
839
+ # Add more specific error handling if needed
840
+ else:
841
+ # Log errors from background tasks if PTB captures them this way (might need custom handling)
842
+ logger.error(f"Unknown error occurred. Update: {update} | Context: {context}")
843
+
844
+
845
+ # --- Bot Setup Function ---
846
  async def setup_bot_config() -> Application:
847
  """Configures the PTB Application with custom HTTPX settings for PTB v20.x."""
848
  logger.info("Configuring Telegram Application...")
 
870
 
871
  application_builder = Application.builder().token(TELEGRAM_TOKEN)
872
  application_builder.request(custom_request)
 
873
  application = application_builder.build()
874
 
875
  application.add_handler(CommandHandler("start", start))
876
  application.add_handler(CommandHandler("help", help_command))
877
  application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url))
878
+ application.add_handler(CallbackQueryHandler(handle_summary_type_callback)) # This handler now just schedules the task
879
  application.add_error_handler(error_handler)
880
 
881
  logger.info("Telegram application handlers configured.")
 
895
  logger.info("PTB Application initialized. Starting background tasks (e.g., job queue)...")
896
  await ptb_app.start() # Starts dispatcher, job queue, etc. but NOT polling
897
 
 
 
898
  bot_instance = ptb_app.bot
899
  bot_info = await bot_instance.get_me()
900
  logger.info(f"PTB Application started successfully. Bot ID: {bot_info.id}, Username: @{bot_info.username}")
 
911
  await bot_instance.set_webhook(
912
  url=full_webhook_url,
913
  allowed_updates=Update.ALL_TYPES,
914
+ # drop_pending_updates=True # Consider adding this if startup issues persist
915
  )
916
  webhook_info = await bot_instance.get_webhook_info()
917
  if webhook_info and webhook_info.url == full_webhook_url:
 
938
 
939
  except Exception as startup_err:
940
  logger.critical(f"CRITICAL ERROR during ASGI application startup: {startup_err}", exc_info=True)
941
+ # Log traceback explicitly before raising might help in some environments
942
+ traceback.print_exc()
943
  raise
944
  finally:
945
  # --- Shutdown Sequence ---
 
1000
 
1001
  logger.debug("Webhook endpoint received POST request from Telegram.")
1002
  try:
 
1003
  update_data = request.get_json()
1004
  if not update_data:
1005
  logger.warning("Received empty or non-JSON data on webhook.")
 
1009
  logger.debug(f"Processing update_id: {update.update_id} via webhook route.")
1010
 
1011
  # Let PTB's dispatcher handle the update asynchronously
1012
+ # This will now call the appropriate handler (e.g., handle_summary_type_callback)
1013
+ # which will *quickly* schedule the background task and return.
1014
  await ptb_app.process_update(update)
1015
 
1016
+ logger.debug(f"Finished processing update_id: {update.update_id} in webhook handler (task scheduled).")
1017
+ # Return 200 OK immediately to Telegram
1018
+ return Response('ok', status=200)
1019
 
1020
  except json.JSONDecodeError:
1021
  logger.error("Failed to decode JSON from Telegram webhook request.", exc_info=True)