Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -857,32 +857,63 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
|
|
857 |
if video_id: content = await get_youtube_transcript(video_id, url)
|
858 |
else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
|
859 |
if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
|
860 |
-
|
|
|
861 |
global URLTOTEXT_API_KEY, RAPIDAPI_KEY, APIFY_API_TOKEN, _urltotext_key_exists, _rapidapi_key_exists, _apify_token_exists
|
862 |
|
863 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
864 |
if not content:
|
865 |
-
logger.warning(f"[Task {task_id}] Method
|
866 |
-
if
|
867 |
-
|
|
|
|
|
|
|
|
|
|
|
868 |
if not content:
|
869 |
-
logger.warning(f"[Task {task_id}] Method
|
870 |
-
if _rapidapi_key_exists:
|
871 |
-
|
|
|
|
|
|
|
|
|
|
|
872 |
if not content:
|
873 |
logger.warning(f"[Task {task_id}] Method 3 failed. Trying Method 4 (AI Web Scraper)...")
|
874 |
-
if _rapidapi_key_exists:
|
875 |
-
|
876 |
-
|
877 |
-
|
878 |
-
|
879 |
-
|
|
|
880 |
if not content:
|
881 |
-
logger.warning(f"[Task {task_id}] Method
|
882 |
-
if
|
883 |
-
|
|
|
|
|
|
|
884 |
|
885 |
-
|
|
|
|
|
|
|
886 |
|
887 |
# --- 3. Summarization ---
|
888 |
if content:
|
|
|
857 |
if video_id: content = await get_youtube_transcript(video_id, url)
|
858 |
else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
|
859 |
if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
|
860 |
+
else:
|
861 |
+
# Website URL processing with NEW fallback order: 1 -> 5 -> 6 -> 3 -> 4 -> 2
|
862 |
global URLTOTEXT_API_KEY, RAPIDAPI_KEY, APIFY_API_TOKEN, _urltotext_key_exists, _rapidapi_key_exists, _apify_token_exists
|
863 |
|
864 |
+
# --- Method 1: Direct Fetch + BS4 (Primary) ---
|
865 |
+
logger.info(f"[Task {task_id}] Trying Web Scrape Method 1 (Direct Fetch + BS4)...");
|
866 |
+
content = await get_website_content(url)
|
867 |
+
|
868 |
+
# --- Method 5: Apify Crawler (Fallback 1) ---
|
869 |
+
if not content:
|
870 |
+
logger.warning(f"[Task {task_id}] Method 1 failed. Trying Method 5 (Apify Crawler)...")
|
871 |
+
if _apify_token_exists:
|
872 |
+
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
873 |
+
content = await get_website_content_via_apify_crawler(url, APIFY_API_TOKEN)
|
874 |
+
else:
|
875 |
+
logger.warning("[Task {task_id}] Method 5 (Apify Crawler) APIFY_API_TOKEN unavailable. Skipping.")
|
876 |
+
|
877 |
+
# --- Method 6: Apify Text Scraper (Fallback 2) ---
|
878 |
if not content:
|
879 |
+
logger.warning(f"[Task {task_id}] Method 5 failed. Trying Method 6 (Apify Text Scraper)...")
|
880 |
+
if _apify_token_exists:
|
881 |
+
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
882 |
+
content = await get_website_content_via_apify_text_scraper(url, APIFY_API_TOKEN)
|
883 |
+
else:
|
884 |
+
logger.warning("[Task {task_id}] Method 6 (Apify Text Scraper) APIFY_API_TOKEN unavailable. Skipping.")
|
885 |
+
|
886 |
+
# --- Method 3: RapidAPI - Scraper's Proxy (Fallback 3) ---
|
887 |
if not content:
|
888 |
+
logger.warning(f"[Task {task_id}] Method 6 failed. Trying Method 3 (Scraper's Proxy)...")
|
889 |
+
if _rapidapi_key_exists:
|
890 |
+
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
891 |
+
content = await get_website_content_via_scrapers_proxy(url, RAPIDAPI_KEY)
|
892 |
+
else:
|
893 |
+
logger.warning("[Task {task_id}] Method 3 (Scraper's Proxy) RapidAPI key unavailable. Skipping.")
|
894 |
+
|
895 |
+
# --- Method 4: RapidAPI - AI Web Scraper (Fallback 4) ---
|
896 |
if not content:
|
897 |
logger.warning(f"[Task {task_id}] Method 3 failed. Trying Method 4 (AI Web Scraper)...")
|
898 |
+
if _rapidapi_key_exists:
|
899 |
+
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
900 |
+
content = await get_website_content_via_ai_web_scraper(url, RAPIDAPI_KEY)
|
901 |
+
else:
|
902 |
+
logger.warning("[Task {task_id}] Method 4 (AI Web Scraper) RapidAPI key unavailable. Skipping.")
|
903 |
+
|
904 |
+
# --- Method 2: urltotext.com API (Fallback 5) ---
|
905 |
if not content:
|
906 |
+
logger.warning(f"[Task {task_id}] Method 4 failed. Trying Method 2 (urltotext.com)...")
|
907 |
+
if _urltotext_key_exists:
|
908 |
+
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
909 |
+
content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
|
910 |
+
else:
|
911 |
+
logger.warning("[Task {task_id}] Method 2 (urltotext.com) API key unavailable. Skipping.")
|
912 |
|
913 |
+
# --- Final check after all attempts ---
|
914 |
+
if not content and not user_feedback_message:
|
915 |
+
logger.error(f"[Task {task_id}] All website fetching methods (1, 5, 6, 3, 4, 2) failed for {url}")
|
916 |
+
user_feedback_message = "Sorry, I couldn't fetch readable content from that website using multiple methods (blocked/dynamic content/empty?)."
|
917 |
|
918 |
# --- 3. Summarization ---
|
919 |
if content:
|