Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# main.py (
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
@@ -40,6 +40,19 @@ try:
|
|
40 |
except ImportError:
|
41 |
DEFAULT_PARSER = 'html.parser'
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# --- Logging Setup ---
|
44 |
logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO )
|
45 |
logging.getLogger("httpx").setLevel(logging.WARNING)
|
@@ -49,6 +62,8 @@ logging.getLogger("urllib3").setLevel(logging.INFO)
|
|
49 |
logging.getLogger('gunicorn.error').setLevel(logging.INFO)
|
50 |
logging.getLogger('uvicorn').setLevel(logging.INFO)
|
51 |
logging.getLogger('starlette').setLevel(logging.INFO)
|
|
|
|
|
52 |
logger = logging.getLogger(__name__)
|
53 |
logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
|
54 |
|
@@ -69,12 +84,19 @@ URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
|
|
69 |
SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
|
70 |
APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
71 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
|
|
72 |
|
|
|
73 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
74 |
-
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
|
|
|
|
|
75 |
|
|
|
76 |
if not TELEGRAM_TOKEN: logger.critical("β FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
77 |
-
if not OPENROUTER_API_KEY: logger.error("β ERROR: OPENROUTER_API_KEY not found.
|
|
|
|
|
78 |
|
79 |
if not URLTOTEXT_API_KEY: pass
|
80 |
if not SUPADATA_API_KEY: pass
|
@@ -82,11 +104,24 @@ if not APIFY_API_TOKEN: pass
|
|
82 |
if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
|
83 |
|
84 |
logger.info("Secret loading and configuration check finished.")
|
85 |
-
logger.info(f"Using OpenRouter Model: {OPENROUTER_MODEL}")
|
|
|
|
|
86 |
logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
|
87 |
|
88 |
_apify_token_exists = bool(APIFY_API_TOKEN)
|
|
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
# --- Retry Decorator ---
|
91 |
@retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
|
92 |
async def retry_bot_operation(func, *args, **kwargs):
|
@@ -108,7 +143,9 @@ def extract_youtube_id(url):
|
|
108 |
if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
|
109 |
else: logger.warning(f"Could not extract YT ID from {url}"); return None
|
110 |
|
|
|
111 |
# --- Content Fetching Functions ---
|
|
|
112 |
async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
|
113 |
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
|
114 |
try:
|
@@ -198,14 +235,8 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
|
|
198 |
elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
|
199 |
elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
|
200 |
elif response.status_code == 404:
|
201 |
-
|
202 |
-
error_info
|
203 |
-
try:
|
204 |
-
error_info = response.json().get("error", {}).get("message", "")
|
205 |
-
except Exception:
|
206 |
-
pass
|
207 |
-
logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}");
|
208 |
-
return None
|
209 |
else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
|
210 |
|
211 |
except httpx.TimeoutException as e: logger.error(f"[Apify SyncItems] Timeout during API interaction for {video_url}: {e}"); return None
|
@@ -213,8 +244,6 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
|
|
213 |
except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
|
214 |
except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
|
215 |
|
216 |
-
|
217 |
-
# (get_youtube_transcript, get_website_content, get_website_content_via_api remain the same)
|
218 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
219 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
220 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
@@ -296,56 +325,138 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
|
296 |
except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
|
297 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
298 |
|
299 |
-
# --- Summarization
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
async def generate_summary(text: str, summary_type: str) -> str:
|
301 |
-
|
302 |
-
|
303 |
-
|
|
|
|
|
|
|
304 |
if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β’ Uses British English spellings throughout.\n" "β’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β’ Presented as ONE SINGLE PARAGRAPH.\n" "β’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β’ Considers the entire text content equally.\n" "β’ Uses semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
305 |
else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β’ For each distinct topic or section identified in the text, create a heading.\n" "β’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β’ Immediately following each heading, list the key points as a bulleted list.\n" "β’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β’ The text within each bullet point should NOT contain any bold formatting.\n" "β’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β’ Use British English spellings throughout.\n" "β’ Avoid overly complex or advanced vocabulary.\n" "β’ Keep bullet points concise.\n" "β’ Ensure the entire summary takes no more than two minutes to read.\n" "β’ Consider the entire text's content, not just the beginning or a few topics.\n" "β’ Use semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
306 |
MAX_INPUT_LENGTH = 500000
|
307 |
-
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
|
308 |
full_prompt = f"{prompt}\n\n{text}"
|
309 |
headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
|
310 |
api_timeouts = httpx.Timeout(15.0, read=180.0, write=15.0, pool=60.0); response = None
|
|
|
311 |
try:
|
|
|
312 |
async with httpx.AsyncClient(timeout=api_timeouts) as client:
|
313 |
-
logger.info(f"Sending request to OpenRouter ({OPENROUTER_MODEL}) with read timeout {api_timeouts.read}s...")
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
pass
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
except Exception as e:
|
347 |
-
logger.error(f"Unexpected error in generate_summary (
|
348 |
-
if response: logger.error(f"--> Last response status before error: {response.status_code}")
|
349 |
return "Sorry, an unexpected error occurred while trying to generate the summary."
|
350 |
|
351 |
# (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
|
@@ -387,6 +498,7 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
|
|
387 |
if content:
|
388 |
logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
|
389 |
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
|
|
390 |
final_summary = await generate_summary(content, summary_type)
|
391 |
if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
|
392 |
else:
|
@@ -457,13 +569,13 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
|
|
457 |
|
458 |
context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
|
459 |
|
460 |
-
global TELEGRAM_TOKEN, OPENROUTER_API_KEY
|
461 |
if not TELEGRAM_TOKEN:
|
462 |
logger.critical("TG TOKEN missing!")
|
463 |
try: await query.edit_message_text(text="β Bot config error.")
|
464 |
except Exception: pass
|
465 |
return
|
466 |
-
if not OPENROUTER_API_KEY:
|
467 |
logger.error("OpenRouter key missing!")
|
468 |
try: await query.edit_message_text(text="β AI config error.")
|
469 |
except Exception: pass
|
@@ -531,13 +643,20 @@ async def lifespan(app: Starlette):
|
|
531 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
532 |
|
533 |
async def health_check(request: Request) -> PlainTextResponse:
|
534 |
-
global OPENROUTER_MODEL, APIFY_ACTOR_ID, _apify_token_exists
|
|
|
535 |
if ptb_app and ptb_app.bot:
|
536 |
try:
|
537 |
if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
|
538 |
else: bot_status = "Initialized/Not running"
|
539 |
except Exception as e: bot_status = f"Error checking status: {e}"
|
540 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
541 |
|
542 |
async def telegram_webhook(request: Request) -> Response:
|
543 |
global WEBHOOK_SECRET
|
|
|
1 |
+
# main.py (Adding Gemini Fallback)
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
|
|
40 |
except ImportError:
|
41 |
DEFAULT_PARSER = 'html.parser'
|
42 |
|
43 |
+
# --- Google Gemini ---
|
44 |
+
try:
|
45 |
+
import google.generativeai as genai
|
46 |
+
from google.generativeai.types import HarmCategory, HarmBlockThreshold
|
47 |
+
_gemini_available = True
|
48 |
+
except ImportError:
|
49 |
+
genai = None
|
50 |
+
HarmCategory = None
|
51 |
+
HarmBlockThreshold = None
|
52 |
+
_gemini_available = False
|
53 |
+
logger.warning("google-generativeai library not found. Gemini fallback disabled.")
|
54 |
+
|
55 |
+
|
56 |
# --- Logging Setup ---
|
57 |
logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO )
|
58 |
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
|
62 |
logging.getLogger('gunicorn.error').setLevel(logging.INFO)
|
63 |
logging.getLogger('uvicorn').setLevel(logging.INFO)
|
64 |
logging.getLogger('starlette').setLevel(logging.INFO)
|
65 |
+
# Reduce Gemini log noise if needed
|
66 |
+
if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
|
67 |
logger = logging.getLogger(__name__)
|
68 |
logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
|
69 |
|
|
|
84 |
SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
|
85 |
APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
86 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
87 |
+
GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Added Gemini Key
|
88 |
|
89 |
+
# --- Model Configuration ---
|
90 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
91 |
+
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
|
92 |
+
# Use latest flash model, allow override via env var
|
93 |
+
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
|
94 |
|
95 |
+
# --- Key Checks ---
|
96 |
if not TELEGRAM_TOKEN: logger.critical("β FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
97 |
+
if not OPENROUTER_API_KEY: logger.error("β ERROR: OPENROUTER_API_KEY not found. Primary summarization will fail.")
|
98 |
+
# Gemini is a fallback, so only warn if key is missing and library is available
|
99 |
+
if _gemini_available and not GEMINI_API_KEY: logger.warning("β οΈ WARNING: GEMINI_API_KEY not found. Fallback summarization via Gemini disabled.")
|
100 |
|
101 |
if not URLTOTEXT_API_KEY: pass
|
102 |
if not SUPADATA_API_KEY: pass
|
|
|
104 |
if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
|
105 |
|
106 |
logger.info("Secret loading and configuration check finished.")
|
107 |
+
logger.info(f"Using OpenRouter Model (Primary): {OPENROUTER_MODEL}")
|
108 |
+
if _gemini_available and GEMINI_API_KEY: logger.info(f"Using Gemini Model (Fallback): {GEMINI_MODEL}")
|
109 |
+
else: logger.info("Gemini Fallback: Disabled (library or API key missing)")
|
110 |
logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
|
111 |
|
112 |
_apify_token_exists = bool(APIFY_API_TOKEN)
|
113 |
+
_gemini_fallback_enabled = _gemini_available and bool(GEMINI_API_KEY)
|
114 |
|
115 |
+
# --- Configure Gemini Client (Do this once globally if possible) ---
|
116 |
+
if _gemini_fallback_enabled:
|
117 |
+
try:
|
118 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
119 |
+
logger.info("Google GenAI client configured successfully.")
|
120 |
+
except Exception as e:
|
121 |
+
logger.error(f"Failed to configure Google GenAI client: {e}")
|
122 |
+
_gemini_fallback_enabled = False # Disable fallback if config fails
|
123 |
+
|
124 |
+
# (Retry Decorator, Helper Functions remain the same)
|
125 |
# --- Retry Decorator ---
|
126 |
@retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
|
127 |
async def retry_bot_operation(func, *args, **kwargs):
|
|
|
143 |
if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
|
144 |
else: logger.warning(f"Could not extract YT ID from {url}"); return None
|
145 |
|
146 |
+
|
147 |
# --- Content Fetching Functions ---
|
148 |
+
# (fetch_url_content_for_scrape, get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript, get_website_content, get_website_content_via_api remain the same as previous version)
|
149 |
async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
|
150 |
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
|
151 |
try:
|
|
|
235 |
elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
|
236 |
elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
|
237 |
elif response.status_code == 404:
|
238 |
+
error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
|
239 |
+
logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}"); return None
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
|
241 |
|
242 |
except httpx.TimeoutException as e: logger.error(f"[Apify SyncItems] Timeout during API interaction for {video_url}: {e}"); return None
|
|
|
244 |
except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
|
245 |
except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
|
246 |
|
|
|
|
|
247 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
248 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
249 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
|
|
325 |
except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
|
326 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
327 |
|
328 |
+
# --- Summarization Functions ---
|
329 |
+
|
330 |
+
# --- NEW: Gemini Summarization Function ---
|
331 |
+
async def generate_summary_gemini(text: str, summary_type: str) -> str:
|
332 |
+
"""Generates summary using Google Gemini API (Fallback)."""
|
333 |
+
global GEMINI_MODEL, _gemini_fallback_enabled
|
334 |
+
if not _gemini_fallback_enabled:
|
335 |
+
logger.error("[Gemini Fallback] Called but is disabled (no library or key).")
|
336 |
+
return "Error: Fallback AI service not available."
|
337 |
+
|
338 |
+
logger.info(f"[Gemini Fallback] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
|
339 |
+
|
340 |
+
# Reuse the same prompts as OpenRouter/DeepSeek for consistency
|
341 |
+
if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β’ Uses British English spellings throughout.\n" "β’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β’ Presented as ONE SINGLE PARAGRAPH.\n" "β’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β’ Considers the entire text content equally.\n" "β’ Uses semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
342 |
+
else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β’ For each distinct topic or section identified in the text, create a heading.\n" "β’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β’ Immediately following each heading, list the key points as a bulleted list.\n" "β’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β’ The text within each bullet point should NOT contain any bold formatting.\n" "β’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β’ Use British English spellings throughout.\n" "β’ Avoid overly complex or advanced vocabulary.\n" "β’ Keep bullet points concise.\n" "β’ Ensure the entire summary takes no more than two minutes to read.\n" "β’ Consider the entire text's content, not just the beginning or a few topics.\n" "β’ Use semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
343 |
+
|
344 |
+
# Gemini handles large contexts well, but basic truncation is still wise
|
345 |
+
MAX_INPUT_LENGTH = 1000000 # Gemini Flash limit is ~1M tokens
|
346 |
+
if len(text) > MAX_INPUT_LENGTH:
|
347 |
+
logger.warning(f"[Gemini Fallback] Input length ({len(text)}) exceeds assumed limit ({MAX_INPUT_LENGTH}). Truncating.")
|
348 |
+
text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
|
349 |
+
full_prompt = f"{prompt}\n\n{text}"
|
350 |
+
|
351 |
+
# Configure safety settings - block potentially harmful content
|
352 |
+
safety_settings = {
|
353 |
+
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
354 |
+
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
355 |
+
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
356 |
+
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
357 |
+
}
|
358 |
+
|
359 |
+
try:
|
360 |
+
logger.debug(f"[Gemini Fallback] Initializing model {GEMINI_MODEL}")
|
361 |
+
model = genai.GenerativeModel(GEMINI_MODEL)
|
362 |
+
logger.info(f"[Gemini Fallback] Sending request to Gemini ({GEMINI_MODEL})...")
|
363 |
+
# Use generate_content_async for non-blocking call
|
364 |
+
response = await model.generate_content_async(
|
365 |
+
full_prompt,
|
366 |
+
safety_settings=safety_settings
|
367 |
+
)
|
368 |
+
logger.info("[Gemini Fallback] Received response from Gemini.")
|
369 |
+
|
370 |
+
# Check for blocks and extract text
|
371 |
+
if response.prompt_feedback.block_reason:
|
372 |
+
logger.error(f"[Gemini Fallback] Request blocked by Gemini. Reason: {response.prompt_feedback.block_reason}")
|
373 |
+
return f"Sorry, the content could not be summarized by the fallback AI due to safety filters ({response.prompt_feedback.block_reason})."
|
374 |
+
|
375 |
+
summary = response.text
|
376 |
+
if summary:
|
377 |
+
logger.info(f"[Gemini Fallback] Success generating summary. Output len: {len(summary)}")
|
378 |
+
# Apply basic markdown escaping if needed (same as OpenRouter func)
|
379 |
+
summary = summary.replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
|
380 |
+
return summary.strip()
|
381 |
+
else:
|
382 |
+
logger.warning(f"[Gemini Fallback] Gemini returned an empty summary. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}")
|
383 |
+
return "Sorry, the fallback AI model returned an empty summary."
|
384 |
+
|
385 |
+
except Exception as e:
|
386 |
+
# Catch potential API errors (e.g., google.api_core.exceptions) or others
|
387 |
+
logger.error(f"[Gemini Fallback] Unexpected error during Gemini API call: {e}", exc_info=True)
|
388 |
+
return "Sorry, an unexpected error occurred while using the fallback AI service."
|
389 |
+
|
390 |
+
|
391 |
+
# --- MODIFIED: Primary Summarization Function (Adds Fallback Logic) ---
|
392 |
async def generate_summary(text: str, summary_type: str) -> str:
|
393 |
+
"""Generates summary using OpenRouter (Primary) with Gemini fallback on ReadTimeout."""
|
394 |
+
global OPENROUTER_API_KEY, OPENROUTER_MODEL, _gemini_fallback_enabled
|
395 |
+
logger.info(f"[Primary Summary] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
396 |
+
if not OPENROUTER_API_KEY: logger.error("[Primary Summary] OpenRouter key missing."); return "Error: AI model configuration key missing."
|
397 |
+
|
398 |
+
# Prompts (same as before)
|
399 |
if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β’ Uses British English spellings throughout.\n" "β’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β’ Presented as ONE SINGLE PARAGRAPH.\n" "β’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β’ Considers the entire text content equally.\n" "β’ Uses semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
400 |
else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β’ For each distinct topic or section identified in the text, create a heading.\n" "β’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β’ Immediately following each heading, list the key points as a bulleted list.\n" "β’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β’ The text within each bullet point should NOT contain any bold formatting.\n" "β’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β’ Use British English spellings throughout.\n" "β’ Avoid overly complex or advanced vocabulary.\n" "β’ Keep bullet points concise.\n" "β’ Ensure the entire summary takes no more than two minutes to read.\n" "β’ Consider the entire text's content, not just the beginning or a few topics.\n" "β’ Use semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
401 |
MAX_INPUT_LENGTH = 500000
|
402 |
+
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Primary Summary] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
|
403 |
full_prompt = f"{prompt}\n\n{text}"
|
404 |
headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
|
405 |
api_timeouts = httpx.Timeout(15.0, read=180.0, write=15.0, pool=60.0); response = None
|
406 |
+
|
407 |
try:
|
408 |
+
# --- Attempt OpenRouter Request ---
|
409 |
async with httpx.AsyncClient(timeout=api_timeouts) as client:
|
410 |
+
logger.info(f"[Primary Summary] Sending request to OpenRouter ({OPENROUTER_MODEL}) with read timeout {api_timeouts.read}s...")
|
411 |
+
try:
|
412 |
+
response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
|
413 |
+
if response: logger.info(f"[Primary Summary] Received response from OpenRouter. Status code: {response.status_code}")
|
414 |
+
else: logger.error("[Primary Summary] No response from OpenRouter (unexpected)."); return "Sorry, primary AI service failed unexpectedly."
|
415 |
+
|
416 |
+
# --- Process OpenRouter Response ---
|
417 |
+
if response.status_code == 200:
|
418 |
+
try: # Parsing successful response
|
419 |
+
data = response.json()
|
420 |
+
if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
|
421 |
+
message = data["choices"][0].get("message")
|
422 |
+
if message and isinstance(message, dict):
|
423 |
+
summary = message.get("content")
|
424 |
+
if summary: logger.info(f"[Primary Summary] Success via OpenRouter. Output len: {len(summary)}"); return summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
|
425 |
+
else: logger.warning(f"[Primary Summary] OpenRouter success but content empty. Resp: {data}"); return "Sorry, the primary AI model returned an empty summary."
|
426 |
+
else: logger.error(f"[Primary Summary] Unexpected message structure: {message}. Full: {data}"); return "Sorry, could not parse primary AI response (format)."
|
427 |
+
else: logger.error(f"[Primary Summary] Unexpected choices structure: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse primary AI response (choices)."
|
428 |
+
except json.JSONDecodeError: logger.error(f"[Primary Summary] Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand primary AI response."
|
429 |
+
except Exception as e: logger.error(f"[Primary Summary] Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing primary AI response."
|
430 |
+
|
431 |
+
# --- Handle OpenRouter Error Status Codes ---
|
432 |
+
elif response.status_code == 401: logger.error("[Primary Summary] OpenRouter API key invalid (401)."); return "Error: Primary AI model configuration key is invalid."
|
433 |
+
elif response.status_code == 402: logger.error("[Primary Summary] OpenRouter Payment Required (402)."); return "Sorry, primary AI service limits/payment issue."
|
434 |
+
elif response.status_code == 429: logger.warning("[Primary Summary] OpenRouter Rate Limit Exceeded (429)."); return "Sorry, primary AI model is busy. Try again."
|
435 |
+
elif response.status_code == 500: logger.error(f"[Primary Summary] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, primary AI service internal error."
|
436 |
+
else: # Other non-200 errors
|
437 |
+
error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
|
438 |
+
logger.error(f"[Primary Summary] Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
|
439 |
+
return f"Sorry, primary AI service returned unexpected status ({response.status_code})."
|
440 |
+
|
441 |
+
# --- Catch OpenRouter Read Timeout -> Trigger Fallback ---
|
442 |
+
except httpx.ReadTimeout:
|
443 |
+
logger.warning(f"[Primary Summary] Read Timeout ({api_timeouts.read}s) waiting for OpenRouter. Attempting Gemini fallback...")
|
444 |
+
if _gemini_fallback_enabled:
|
445 |
+
# Call the Gemini function
|
446 |
+
return await generate_summary_gemini(text, summary_type)
|
447 |
+
else:
|
448 |
+
logger.error("[Fallback Attempt] Gemini fallback skipped (disabled or key missing).")
|
449 |
+
return f"Sorry, the primary AI service timed out after {api_timeouts.read} seconds, and the fallback service is not available."
|
450 |
+
# --- Catch Other Timeouts (Connect, Write, Pool) ---
|
451 |
+
except httpx.TimeoutException as e:
|
452 |
+
logger.error(f"[Primary Summary] Timeout error ({type(e)}) connecting/writing to OpenRouter API: {e}")
|
453 |
+
return "Sorry, the request to the primary AI model timed out. Please try again."
|
454 |
+
|
455 |
+
# --- Catch Request Errors and Other Exceptions ---
|
456 |
+
except httpx.RequestError as e: logger.error(f"[Primary Summary] Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the primary AI model service."
|
457 |
except Exception as e:
|
458 |
+
logger.error(f"[Primary Summary] Unexpected error in generate_summary (Outer try): {e}", exc_info=True)
|
459 |
+
if response: logger.error(f"--> Last OpenRouter response status before error: {response.status_code}")
|
460 |
return "Sorry, an unexpected error occurred while trying to generate the summary."
|
461 |
|
462 |
# (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
|
|
|
498 |
if content:
|
499 |
logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
|
500 |
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
501 |
+
# Call the primary summary function (which now includes the fallback)
|
502 |
final_summary = await generate_summary(content, summary_type)
|
503 |
if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
|
504 |
else:
|
|
|
569 |
|
570 |
context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
|
571 |
|
572 |
+
global TELEGRAM_TOKEN, OPENROUTER_API_KEY # Check primary keys needed to start task
|
573 |
if not TELEGRAM_TOKEN:
|
574 |
logger.critical("TG TOKEN missing!")
|
575 |
try: await query.edit_message_text(text="β Bot config error.")
|
576 |
except Exception: pass
|
577 |
return
|
578 |
+
if not OPENROUTER_API_KEY: # Check primary summarization key
|
579 |
logger.error("OpenRouter key missing!")
|
580 |
try: await query.edit_message_text(text="β AI config error.")
|
581 |
except Exception: pass
|
|
|
643 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
644 |
|
645 |
async def health_check(request: Request) -> PlainTextResponse:
|
646 |
+
global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_fallback_enabled
|
647 |
+
bot_status = "Not Initialized"
|
648 |
if ptb_app and ptb_app.bot:
|
649 |
try:
|
650 |
if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
|
651 |
else: bot_status = "Initialized/Not running"
|
652 |
except Exception as e: bot_status = f"Error checking status: {e}"
|
653 |
+
# Updated health check to show both models
|
654 |
+
return PlainTextResponse(
|
655 |
+
f"TG Bot Summarizer - Status: {bot_status}\n"
|
656 |
+
f"Primary Model: {OPENROUTER_MODEL}\n"
|
657 |
+
f"Fallback Model: {GEMINI_MODEL if _gemini_fallback_enabled else 'N/A (Disabled)'}\n"
|
658 |
+
f"Apify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}"
|
659 |
+
)
|
660 |
|
661 |
async def telegram_webhook(request: Request) -> Response:
|
662 |
global WEBHOOK_SECRET
|