fmab777 commited on
Commit
eeeef0c
Β·
verified Β·
1 Parent(s): 61953fa

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +177 -58
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Correcting SyntaxError at line 201)
2
  import os
3
  import re
4
  import logging
@@ -40,6 +40,19 @@ try:
40
  except ImportError:
41
  DEFAULT_PARSER = 'html.parser'
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # --- Logging Setup ---
44
  logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO )
45
  logging.getLogger("httpx").setLevel(logging.WARNING)
@@ -49,6 +62,8 @@ logging.getLogger("urllib3").setLevel(logging.INFO)
49
  logging.getLogger('gunicorn.error').setLevel(logging.INFO)
50
  logging.getLogger('uvicorn').setLevel(logging.INFO)
51
  logging.getLogger('starlette').setLevel(logging.INFO)
 
 
52
  logger = logging.getLogger(__name__)
53
  logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
54
 
@@ -69,12 +84,19 @@ URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
69
  SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
70
  APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
71
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
 
72
 
 
73
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
74
- APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") # Using ~ as per last attempt
 
 
75
 
 
76
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
77
- if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
 
 
78
 
79
  if not URLTOTEXT_API_KEY: pass
80
  if not SUPADATA_API_KEY: pass
@@ -82,11 +104,24 @@ if not APIFY_API_TOKEN: pass
82
  if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
83
 
84
  logger.info("Secret loading and configuration check finished.")
85
- logger.info(f"Using OpenRouter Model: {OPENROUTER_MODEL}")
 
 
86
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
87
 
88
  _apify_token_exists = bool(APIFY_API_TOKEN)
 
89
 
 
 
 
 
 
 
 
 
 
 
90
  # --- Retry Decorator ---
91
  @retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
92
  async def retry_bot_operation(func, *args, **kwargs):
@@ -108,7 +143,9 @@ def extract_youtube_id(url):
108
  if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
109
  else: logger.warning(f"Could not extract YT ID from {url}"); return None
110
 
 
111
  # --- Content Fetching Functions ---
 
112
  async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
113
  headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
114
  try:
@@ -198,14 +235,8 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
198
  elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
199
  elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
200
  elif response.status_code == 404:
201
- # *** FIX: Correct indentation for error info extraction ***
202
- error_info = ""
203
- try:
204
- error_info = response.json().get("error", {}).get("message", "")
205
- except Exception:
206
- pass
207
- logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}");
208
- return None
209
  else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
210
 
211
  except httpx.TimeoutException as e: logger.error(f"[Apify SyncItems] Timeout during API interaction for {video_url}: {e}"); return None
@@ -213,8 +244,6 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
213
  except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
214
  except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
215
 
216
-
217
- # (get_youtube_transcript, get_website_content, get_website_content_via_api remain the same)
218
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
219
  global SUPADATA_API_KEY, APIFY_API_TOKEN
220
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
@@ -296,56 +325,138 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
296
  except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
297
  except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
298
 
299
- # --- Summarization Function ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  async def generate_summary(text: str, summary_type: str) -> str:
301
- global OPENROUTER_API_KEY, OPENROUTER_MODEL
302
- logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
303
- if not OPENROUTER_API_KEY: logger.error("OpenRouter key missing for generate_summary."); return "Error: AI model configuration key missing."
 
 
 
304
  if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β€’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β€’ Uses British English spellings throughout.\n" "β€’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β€’ Presented as ONE SINGLE PARAGRAPH.\n" "β€’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β€’ Considers the entire text content equally.\n" "β€’ Uses semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
305
  else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β€’ For each distinct topic or section identified in the text, create a heading.\n" "β€’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β€’ Immediately following each heading, list the key points as a bulleted list.\n" "β€’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β€’ The text within each bullet point should NOT contain any bold formatting.\n" "β€’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β€’ Use British English spellings throughout.\n" "β€’ Avoid overly complex or advanced vocabulary.\n" "β€’ Keep bullet points concise.\n" "β€’ Ensure the entire summary takes no more than two minutes to read.\n" "β€’ Consider the entire text's content, not just the beginning or a few topics.\n" "β€’ Use semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
306
  MAX_INPUT_LENGTH = 500000
307
- if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
308
  full_prompt = f"{prompt}\n\n{text}"
309
  headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
310
  api_timeouts = httpx.Timeout(15.0, read=180.0, write=15.0, pool=60.0); response = None
 
311
  try:
 
312
  async with httpx.AsyncClient(timeout=api_timeouts) as client:
313
- logger.info(f"Sending request to OpenRouter ({OPENROUTER_MODEL}) with read timeout {api_timeouts.read}s...")
314
- response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
315
- if response: logger.info(f"Received response from OpenRouter. Status code: {response.status_code}")
316
- else: logger.error("No response received from OpenRouter after await completed (unexpected)."); return "Sorry, communication with the AI service failed unexpectedly."
317
- if response.status_code == 200:
318
- try:
319
- data = response.json()
320
- if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
321
- message = data["choices"][0].get("message")
322
- if message and isinstance(message, dict):
323
- summary = message.get("content")
324
- if summary: logger.info(f"Success generating summary via OpenRouter. Output len: {len(summary)}"); return summary.strip()
325
- else: logger.warning(f"OpenRouter success but content empty. Resp: {data}"); return "Sorry, the AI model returned an empty summary."
326
- else: logger.error(f"Unexpected message structure in OpenRouter resp: {message}. Full: {data}"); return "Sorry, could not parse AI response (format)."
327
- else: logger.error(f"Unexpected choices structure in OpenRouter resp: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse AI response (choices)."
328
- except json.JSONDecodeError: logger.error(f"Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand AI response."
329
- except Exception as e: logger.error(f"Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing AI response."
330
- elif response.status_code == 401: logger.error("OpenRouter API key invalid (401)."); return "Error: AI model configuration key is invalid."
331
- elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
332
- elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
333
- elif response.status_code == 500: logger.error(f"OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, AI service internal error."
334
- else:
335
- # Corrected Indentation for error info extraction
336
- error_info = ""
337
- try:
338
- error_info = response.json().get("error", {}).get("message", "")
339
- except Exception:
340
- pass
341
- logger.error(f"Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
342
- return f"Sorry, AI service returned unexpected status ({response.status_code})."
343
- except httpx.ReadTimeout: logger.error(f"Read Timeout error ({api_timeouts.read}s) waiting for OpenRouter API response."); return f"Sorry, the request to the AI model timed out after {api_timeouts.read} seconds while waiting for a response. The content might be too long or the service busy. Please try again later or with shorter content."
344
- except httpx.TimeoutException as e: logger.error(f"Timeout error ({type(e)}) connecting to/writing to OpenRouter API: {e}"); return "Sorry, the request to the AI model timed out. Please try again."
345
- except httpx.RequestError as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the AI model service."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  except Exception as e:
347
- logger.error(f"Unexpected error in generate_summary (OpenRouter request phase): {e}", exc_info=True)
348
- if response: logger.error(f"--> Last response status before error: {response.status_code}")
349
  return "Sorry, an unexpected error occurred while trying to generate the summary."
350
 
351
  # (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
@@ -387,6 +498,7 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
387
  if content:
388
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
389
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
 
390
  final_summary = await generate_summary(content, summary_type)
391
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
392
  else:
@@ -457,13 +569,13 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
457
 
458
  context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
459
 
460
- global TELEGRAM_TOKEN, OPENROUTER_API_KEY
461
  if not TELEGRAM_TOKEN:
462
  logger.critical("TG TOKEN missing!")
463
  try: await query.edit_message_text(text="❌ Bot config error.")
464
  except Exception: pass
465
  return
466
- if not OPENROUTER_API_KEY:
467
  logger.error("OpenRouter key missing!")
468
  try: await query.edit_message_text(text="❌ AI config error.")
469
  except Exception: pass
@@ -531,13 +643,20 @@ async def lifespan(app: Starlette):
531
  logger.info("ASGI Lifespan: Shutdown complete.")
532
 
533
  async def health_check(request: Request) -> PlainTextResponse:
534
- global OPENROUTER_MODEL, APIFY_ACTOR_ID, _apify_token_exists; bot_status = "Not Initialized"
 
535
  if ptb_app and ptb_app.bot:
536
  try:
537
  if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
538
  else: bot_status = "Initialized/Not running"
539
  except Exception as e: bot_status = f"Error checking status: {e}"
540
- return PlainTextResponse(f"TG Bot Summarizer - Status: {bot_status}\nModel: {OPENROUTER_MODEL}\nApify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}")
 
 
 
 
 
 
541
 
542
  async def telegram_webhook(request: Request) -> Response:
543
  global WEBHOOK_SECRET
 
1
+ # main.py (Adding Gemini Fallback)
2
  import os
3
  import re
4
  import logging
 
40
  except ImportError:
41
  DEFAULT_PARSER = 'html.parser'
42
 
43
+ # --- Google Gemini ---
44
+ try:
45
+ import google.generativeai as genai
46
+ from google.generativeai.types import HarmCategory, HarmBlockThreshold
47
+ _gemini_available = True
48
+ except ImportError:
49
+ genai = None
50
+ HarmCategory = None
51
+ HarmBlockThreshold = None
52
+ _gemini_available = False
53
+ logger.warning("google-generativeai library not found. Gemini fallback disabled.")
54
+
55
+
56
  # --- Logging Setup ---
57
  logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO )
58
  logging.getLogger("httpx").setLevel(logging.WARNING)
 
62
  logging.getLogger('gunicorn.error').setLevel(logging.INFO)
63
  logging.getLogger('uvicorn').setLevel(logging.INFO)
64
  logging.getLogger('starlette').setLevel(logging.INFO)
65
+ # Reduce Gemini log noise if needed
66
+ if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
67
  logger = logging.getLogger(__name__)
68
  logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
69
 
 
84
  SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
85
  APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
86
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
87
+ GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Added Gemini Key
88
 
89
+ # --- Model Configuration ---
90
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
91
+ APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
92
+ # Use latest flash model, allow override via env var
93
+ GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
94
 
95
+ # --- Key Checks ---
96
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
97
+ if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Primary summarization will fail.")
98
+ # Gemini is a fallback, so only warn if key is missing and library is available
99
+ if _gemini_available and not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found. Fallback summarization via Gemini disabled.")
100
 
101
  if not URLTOTEXT_API_KEY: pass
102
  if not SUPADATA_API_KEY: pass
 
104
  if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
105
 
106
  logger.info("Secret loading and configuration check finished.")
107
+ logger.info(f"Using OpenRouter Model (Primary): {OPENROUTER_MODEL}")
108
+ if _gemini_available and GEMINI_API_KEY: logger.info(f"Using Gemini Model (Fallback): {GEMINI_MODEL}")
109
+ else: logger.info("Gemini Fallback: Disabled (library or API key missing)")
110
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
111
 
112
  _apify_token_exists = bool(APIFY_API_TOKEN)
113
+ _gemini_fallback_enabled = _gemini_available and bool(GEMINI_API_KEY)
114
 
115
+ # --- Configure Gemini Client (Do this once globally if possible) ---
116
+ if _gemini_fallback_enabled:
117
+ try:
118
+ genai.configure(api_key=GEMINI_API_KEY)
119
+ logger.info("Google GenAI client configured successfully.")
120
+ except Exception as e:
121
+ logger.error(f"Failed to configure Google GenAI client: {e}")
122
+ _gemini_fallback_enabled = False # Disable fallback if config fails
123
+
124
+ # (Retry Decorator, Helper Functions remain the same)
125
  # --- Retry Decorator ---
126
  @retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
127
  async def retry_bot_operation(func, *args, **kwargs):
 
143
  if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
144
  else: logger.warning(f"Could not extract YT ID from {url}"); return None
145
 
146
+
147
  # --- Content Fetching Functions ---
148
+ # (fetch_url_content_for_scrape, get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript, get_website_content, get_website_content_via_api remain the same as previous version)
149
  async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
150
  headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
151
  try:
 
235
  elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
236
  elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
237
  elif response.status_code == 404:
238
+ error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
239
+ logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}"); return None
 
 
 
 
 
 
240
  else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
241
 
242
  except httpx.TimeoutException as e: logger.error(f"[Apify SyncItems] Timeout during API interaction for {video_url}: {e}"); return None
 
244
  except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
245
  except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
246
 
 
 
247
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
248
  global SUPADATA_API_KEY, APIFY_API_TOKEN
249
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
 
325
  except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
326
  except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
327
 
328
+ # --- Summarization Functions ---
329
+
330
+ # --- NEW: Gemini Summarization Function ---
331
+ async def generate_summary_gemini(text: str, summary_type: str) -> str:
332
+ """Generates summary using Google Gemini API (Fallback)."""
333
+ global GEMINI_MODEL, _gemini_fallback_enabled
334
+ if not _gemini_fallback_enabled:
335
+ logger.error("[Gemini Fallback] Called but is disabled (no library or key).")
336
+ return "Error: Fallback AI service not available."
337
+
338
+ logger.info(f"[Gemini Fallback] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
339
+
340
+ # Reuse the same prompts as OpenRouter/DeepSeek for consistency
341
+ if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β€’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β€’ Uses British English spellings throughout.\n" "β€’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β€’ Presented as ONE SINGLE PARAGRAPH.\n" "β€’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β€’ Considers the entire text content equally.\n" "β€’ Uses semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
342
+ else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β€’ For each distinct topic or section identified in the text, create a heading.\n" "β€’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β€’ Immediately following each heading, list the key points as a bulleted list.\n" "β€’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β€’ The text within each bullet point should NOT contain any bold formatting.\n" "β€’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β€’ Use British English spellings throughout.\n" "β€’ Avoid overly complex or advanced vocabulary.\n" "β€’ Keep bullet points concise.\n" "β€’ Ensure the entire summary takes no more than two minutes to read.\n" "β€’ Consider the entire text's content, not just the beginning or a few topics.\n" "β€’ Use semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
343
+
344
+ # Gemini handles large contexts well, but basic truncation is still wise
345
+ MAX_INPUT_LENGTH = 1000000 # Gemini Flash limit is ~1M tokens
346
+ if len(text) > MAX_INPUT_LENGTH:
347
+ logger.warning(f"[Gemini Fallback] Input length ({len(text)}) exceeds assumed limit ({MAX_INPUT_LENGTH}). Truncating.")
348
+ text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
349
+ full_prompt = f"{prompt}\n\n{text}"
350
+
351
+ # Configure safety settings - block potentially harmful content
352
+ safety_settings = {
353
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
354
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
355
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
356
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
357
+ }
358
+
359
+ try:
360
+ logger.debug(f"[Gemini Fallback] Initializing model {GEMINI_MODEL}")
361
+ model = genai.GenerativeModel(GEMINI_MODEL)
362
+ logger.info(f"[Gemini Fallback] Sending request to Gemini ({GEMINI_MODEL})...")
363
+ # Use generate_content_async for non-blocking call
364
+ response = await model.generate_content_async(
365
+ full_prompt,
366
+ safety_settings=safety_settings
367
+ )
368
+ logger.info("[Gemini Fallback] Received response from Gemini.")
369
+
370
+ # Check for blocks and extract text
371
+ if response.prompt_feedback.block_reason:
372
+ logger.error(f"[Gemini Fallback] Request blocked by Gemini. Reason: {response.prompt_feedback.block_reason}")
373
+ return f"Sorry, the content could not be summarized by the fallback AI due to safety filters ({response.prompt_feedback.block_reason})."
374
+
375
+ summary = response.text
376
+ if summary:
377
+ logger.info(f"[Gemini Fallback] Success generating summary. Output len: {len(summary)}")
378
+ # Apply basic markdown escaping if needed (same as OpenRouter func)
379
+ summary = summary.replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
380
+ return summary.strip()
381
+ else:
382
+ logger.warning(f"[Gemini Fallback] Gemini returned an empty summary. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}")
383
+ return "Sorry, the fallback AI model returned an empty summary."
384
+
385
+ except Exception as e:
386
+ # Catch potential API errors (e.g., google.api_core.exceptions) or others
387
+ logger.error(f"[Gemini Fallback] Unexpected error during Gemini API call: {e}", exc_info=True)
388
+ return "Sorry, an unexpected error occurred while using the fallback AI service."
389
+
390
+
391
+ # --- MODIFIED: Primary Summarization Function (Adds Fallback Logic) ---
392
  async def generate_summary(text: str, summary_type: str) -> str:
393
+ """Generates summary using OpenRouter (Primary) with Gemini fallback on ReadTimeout."""
394
+ global OPENROUTER_API_KEY, OPENROUTER_MODEL, _gemini_fallback_enabled
395
+ logger.info(f"[Primary Summary] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
396
+ if not OPENROUTER_API_KEY: logger.error("[Primary Summary] OpenRouter key missing."); return "Error: AI model configuration key missing."
397
+
398
+ # Prompts (same as before)
399
  if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β€’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β€’ Uses British English spellings throughout.\n" "β€’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β€’ Presented as ONE SINGLE PARAGRAPH.\n" "β€’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β€’ Considers the entire text content equally.\n" "β€’ Uses semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
400
  else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β€’ For each distinct topic or section identified in the text, create a heading.\n" "β€’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β€’ Immediately following each heading, list the key points as a bulleted list.\n" "β€’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β€’ The text within each bullet point should NOT contain any bold formatting.\n" "β€’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β€’ Use British English spellings throughout.\n" "β€’ Avoid overly complex or advanced vocabulary.\n" "β€’ Keep bullet points concise.\n" "β€’ Ensure the entire summary takes no more than two minutes to read.\n" "β€’ Consider the entire text's content, not just the beginning or a few topics.\n" "β€’ Use semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
401
  MAX_INPUT_LENGTH = 500000
402
+ if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Primary Summary] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
403
  full_prompt = f"{prompt}\n\n{text}"
404
  headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
405
  api_timeouts = httpx.Timeout(15.0, read=180.0, write=15.0, pool=60.0); response = None
406
+
407
  try:
408
+ # --- Attempt OpenRouter Request ---
409
  async with httpx.AsyncClient(timeout=api_timeouts) as client:
410
+ logger.info(f"[Primary Summary] Sending request to OpenRouter ({OPENROUTER_MODEL}) with read timeout {api_timeouts.read}s...")
411
+ try:
412
+ response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
413
+ if response: logger.info(f"[Primary Summary] Received response from OpenRouter. Status code: {response.status_code}")
414
+ else: logger.error("[Primary Summary] No response from OpenRouter (unexpected)."); return "Sorry, primary AI service failed unexpectedly."
415
+
416
+ # --- Process OpenRouter Response ---
417
+ if response.status_code == 200:
418
+ try: # Parsing successful response
419
+ data = response.json()
420
+ if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
421
+ message = data["choices"][0].get("message")
422
+ if message and isinstance(message, dict):
423
+ summary = message.get("content")
424
+ if summary: logger.info(f"[Primary Summary] Success via OpenRouter. Output len: {len(summary)}"); return summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
425
+ else: logger.warning(f"[Primary Summary] OpenRouter success but content empty. Resp: {data}"); return "Sorry, the primary AI model returned an empty summary."
426
+ else: logger.error(f"[Primary Summary] Unexpected message structure: {message}. Full: {data}"); return "Sorry, could not parse primary AI response (format)."
427
+ else: logger.error(f"[Primary Summary] Unexpected choices structure: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse primary AI response (choices)."
428
+ except json.JSONDecodeError: logger.error(f"[Primary Summary] Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand primary AI response."
429
+ except Exception as e: logger.error(f"[Primary Summary] Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing primary AI response."
430
+
431
+ # --- Handle OpenRouter Error Status Codes ---
432
+ elif response.status_code == 401: logger.error("[Primary Summary] OpenRouter API key invalid (401)."); return "Error: Primary AI model configuration key is invalid."
433
+ elif response.status_code == 402: logger.error("[Primary Summary] OpenRouter Payment Required (402)."); return "Sorry, primary AI service limits/payment issue."
434
+ elif response.status_code == 429: logger.warning("[Primary Summary] OpenRouter Rate Limit Exceeded (429)."); return "Sorry, primary AI model is busy. Try again."
435
+ elif response.status_code == 500: logger.error(f"[Primary Summary] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, primary AI service internal error."
436
+ else: # Other non-200 errors
437
+ error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
438
+ logger.error(f"[Primary Summary] Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
439
+ return f"Sorry, primary AI service returned unexpected status ({response.status_code})."
440
+
441
+ # --- Catch OpenRouter Read Timeout -> Trigger Fallback ---
442
+ except httpx.ReadTimeout:
443
+ logger.warning(f"[Primary Summary] Read Timeout ({api_timeouts.read}s) waiting for OpenRouter. Attempting Gemini fallback...")
444
+ if _gemini_fallback_enabled:
445
+ # Call the Gemini function
446
+ return await generate_summary_gemini(text, summary_type)
447
+ else:
448
+ logger.error("[Fallback Attempt] Gemini fallback skipped (disabled or key missing).")
449
+ return f"Sorry, the primary AI service timed out after {api_timeouts.read} seconds, and the fallback service is not available."
450
+ # --- Catch Other Timeouts (Connect, Write, Pool) ---
451
+ except httpx.TimeoutException as e:
452
+ logger.error(f"[Primary Summary] Timeout error ({type(e)}) connecting/writing to OpenRouter API: {e}")
453
+ return "Sorry, the request to the primary AI model timed out. Please try again."
454
+
455
+ # --- Catch Request Errors and Other Exceptions ---
456
+ except httpx.RequestError as e: logger.error(f"[Primary Summary] Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the primary AI model service."
457
  except Exception as e:
458
+ logger.error(f"[Primary Summary] Unexpected error in generate_summary (Outer try): {e}", exc_info=True)
459
+ if response: logger.error(f"--> Last OpenRouter response status before error: {response.status_code}")
460
  return "Sorry, an unexpected error occurred while trying to generate the summary."
461
 
462
  # (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
 
498
  if content:
499
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
500
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
501
+ # Call the primary summary function (which now includes the fallback)
502
  final_summary = await generate_summary(content, summary_type)
503
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
504
  else:
 
569
 
570
  context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
571
 
572
+ global TELEGRAM_TOKEN, OPENROUTER_API_KEY # Check primary keys needed to start task
573
  if not TELEGRAM_TOKEN:
574
  logger.critical("TG TOKEN missing!")
575
  try: await query.edit_message_text(text="❌ Bot config error.")
576
  except Exception: pass
577
  return
578
+ if not OPENROUTER_API_KEY: # Check primary summarization key
579
  logger.error("OpenRouter key missing!")
580
  try: await query.edit_message_text(text="❌ AI config error.")
581
  except Exception: pass
 
643
  logger.info("ASGI Lifespan: Shutdown complete.")
644
 
645
  async def health_check(request: Request) -> PlainTextResponse:
646
+ global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_fallback_enabled
647
+ bot_status = "Not Initialized"
648
  if ptb_app and ptb_app.bot:
649
  try:
650
  if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
651
  else: bot_status = "Initialized/Not running"
652
  except Exception as e: bot_status = f"Error checking status: {e}"
653
+ # Updated health check to show both models
654
+ return PlainTextResponse(
655
+ f"TG Bot Summarizer - Status: {bot_status}\n"
656
+ f"Primary Model: {OPENROUTER_MODEL}\n"
657
+ f"Fallback Model: {GEMINI_MODEL if _gemini_fallback_enabled else 'N/A (Disabled)'}\n"
658
+ f"Apify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}"
659
+ )
660
 
661
  async def telegram_webhook(request: Request) -> Response:
662
  global WEBHOOK_SECRET