fmab777 commited on
Commit
1fb5a74
·
verified ·
1 Parent(s): 9466c05

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +49 -18
main.py CHANGED
@@ -857,32 +857,63 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
857
  if video_id: content = await get_youtube_transcript(video_id, url)
858
  else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
859
  if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
860
- else:
 
861
  global URLTOTEXT_API_KEY, RAPIDAPI_KEY, APIFY_API_TOKEN, _urltotext_key_exists, _rapidapi_key_exists, _apify_token_exists
862
 
863
- logger.info(f"[Task {task_id}] Trying Web Scrape Method 1 (Direct Fetch + BS4)..."); content = await get_website_content(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
864
  if not content:
865
- logger.warning(f"[Task {task_id}] Method 1 failed. Trying Method 2 (urltotext.com)...")
866
- if _urltotext_key_exists: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing'); content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
867
- else: logger.warning("[Task {task_id}] Method 2 (urltotext.com) API key unavailable. Skipping.")
 
 
 
 
 
868
  if not content:
869
- logger.warning(f"[Task {task_id}] Method 2 failed. Trying Method 3 (Scraper's Proxy)...")
870
- if _rapidapi_key_exists: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing'); content = await get_website_content_via_scrapers_proxy(url, RAPIDAPI_KEY)
871
- else: logger.warning("[Task {task_id}] Method 3 (Scraper's Proxy) RapidAPI key unavailable. Skipping.")
 
 
 
 
 
872
  if not content:
873
  logger.warning(f"[Task {task_id}] Method 3 failed. Trying Method 4 (AI Web Scraper)...")
874
- if _rapidapi_key_exists: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing'); content = await get_website_content_via_ai_web_scraper(url, RAPIDAPI_KEY)
875
- else: logger.warning("[Task {task_id}] Method 4 (AI Web Scraper) RapidAPI key unavailable. Skipping.")
876
- if not content:
877
- logger.warning(f"[Task {task_id}] Method 4 failed. Trying Method 5 (Apify Crawler)...")
878
- if _apify_token_exists: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing'); content = await get_website_content_via_apify_crawler(url, APIFY_API_TOKEN)
879
- else: logger.warning("[Task {task_id}] Method 5 (Apify Crawler) APIFY_API_TOKEN unavailable. Skipping.")
 
880
  if not content:
881
- logger.warning(f"[Task {task_id}] Method 5 failed. Trying Method 6 (Apify Text Scraper)...")
882
- if _apify_token_exists: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing'); content = await get_website_content_via_apify_text_scraper(url, APIFY_API_TOKEN)
883
- else: logger.warning("[Task {task_id}] Method 6 (Apify Text Scraper) APIFY_API_TOKEN unavailable. Skipping.")
 
 
 
884
 
885
- if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't fetch readable content from that website using multiple methods (blocked/dynamic content/empty?)."
 
 
 
886
 
887
  # --- 3. Summarization ---
888
  if content:
 
857
  if video_id: content = await get_youtube_transcript(video_id, url)
858
  else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
859
  if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
860
+ else:
861
+ # Website URL processing with NEW fallback order: 1 -> 5 -> 6 -> 3 -> 4 -> 2
862
  global URLTOTEXT_API_KEY, RAPIDAPI_KEY, APIFY_API_TOKEN, _urltotext_key_exists, _rapidapi_key_exists, _apify_token_exists
863
 
864
+ # --- Method 1: Direct Fetch + BS4 (Primary) ---
865
+ logger.info(f"[Task {task_id}] Trying Web Scrape Method 1 (Direct Fetch + BS4)...");
866
+ content = await get_website_content(url)
867
+
868
+ # --- Method 5: Apify Crawler (Fallback 1) ---
869
+ if not content:
870
+ logger.warning(f"[Task {task_id}] Method 1 failed. Trying Method 5 (Apify Crawler)...")
871
+ if _apify_token_exists:
872
+ await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
873
+ content = await get_website_content_via_apify_crawler(url, APIFY_API_TOKEN)
874
+ else:
875
+ logger.warning("[Task {task_id}] Method 5 (Apify Crawler) APIFY_API_TOKEN unavailable. Skipping.")
876
+
877
+ # --- Method 6: Apify Text Scraper (Fallback 2) ---
878
  if not content:
879
+ logger.warning(f"[Task {task_id}] Method 5 failed. Trying Method 6 (Apify Text Scraper)...")
880
+ if _apify_token_exists:
881
+ await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
882
+ content = await get_website_content_via_apify_text_scraper(url, APIFY_API_TOKEN)
883
+ else:
884
+ logger.warning("[Task {task_id}] Method 6 (Apify Text Scraper) APIFY_API_TOKEN unavailable. Skipping.")
885
+
886
+ # --- Method 3: RapidAPI - Scraper's Proxy (Fallback 3) ---
887
  if not content:
888
+ logger.warning(f"[Task {task_id}] Method 6 failed. Trying Method 3 (Scraper's Proxy)...")
889
+ if _rapidapi_key_exists:
890
+ await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
891
+ content = await get_website_content_via_scrapers_proxy(url, RAPIDAPI_KEY)
892
+ else:
893
+ logger.warning("[Task {task_id}] Method 3 (Scraper's Proxy) RapidAPI key unavailable. Skipping.")
894
+
895
+ # --- Method 4: RapidAPI - AI Web Scraper (Fallback 4) ---
896
  if not content:
897
  logger.warning(f"[Task {task_id}] Method 3 failed. Trying Method 4 (AI Web Scraper)...")
898
+ if _rapidapi_key_exists:
899
+ await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
900
+ content = await get_website_content_via_ai_web_scraper(url, RAPIDAPI_KEY)
901
+ else:
902
+ logger.warning("[Task {task_id}] Method 4 (AI Web Scraper) RapidAPI key unavailable. Skipping.")
903
+
904
+ # --- Method 2: urltotext.com API (Fallback 5) ---
905
  if not content:
906
+ logger.warning(f"[Task {task_id}] Method 4 failed. Trying Method 2 (urltotext.com)...")
907
+ if _urltotext_key_exists:
908
+ await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
909
+ content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
910
+ else:
911
+ logger.warning("[Task {task_id}] Method 2 (urltotext.com) API key unavailable. Skipping.")
912
 
913
+ # --- Final check after all attempts ---
914
+ if not content and not user_feedback_message:
915
+ logger.error(f"[Task {task_id}] All website fetching methods (1, 5, 6, 3, 4, 2) failed for {url}")
916
+ user_feedback_message = "Sorry, I couldn't fetch readable content from that website using multiple methods (blocked/dynamic content/empty?)."
917
 
918
  # --- 3. Summarization ---
919
  if content: