fmab777 commited on
Commit
b51c818
Β·
verified Β·
1 Parent(s): 9f0907a

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +43 -91
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Adding Gemini Fallback)
2
  import os
3
  import re
4
  import logging
@@ -50,7 +50,7 @@ except ImportError:
50
  HarmCategory = None
51
  HarmBlockThreshold = None
52
  _gemini_available = False
53
- logger.warning("google-generativeai library not found. Gemini fallback disabled.")
54
 
55
 
56
  # --- Logging Setup ---
@@ -66,6 +66,8 @@ logging.getLogger('starlette').setLevel(logging.INFO)
66
  if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
67
  logger = logging.getLogger(__name__)
68
  logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
 
 
69
 
70
  # --- Global variable for PTB app ---
71
  ptb_app: Optional[Application] = None
@@ -84,18 +86,17 @@ URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
84
  SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
85
  APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
86
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
87
- GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Added Gemini Key
88
 
89
  # --- Model Configuration ---
90
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
91
  APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
92
- # Use latest flash model, allow override via env var
93
  GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
94
 
95
  # --- Key Checks ---
96
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
97
  if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Primary summarization will fail.")
98
- # Gemini is a fallback, so only warn if key is missing and library is available
99
  if _gemini_available and not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found. Fallback summarization via Gemini disabled.")
100
 
101
  if not URLTOTEXT_API_KEY: pass
@@ -105,23 +106,22 @@ if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found.
105
 
106
  logger.info("Secret loading and configuration check finished.")
107
  logger.info(f"Using OpenRouter Model (Primary): {OPENROUTER_MODEL}")
108
- if _gemini_available and GEMINI_API_KEY: logger.info(f"Using Gemini Model (Fallback): {GEMINI_MODEL}")
 
109
  else: logger.info("Gemini Fallback: Disabled (library or API key missing)")
110
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
111
-
112
  _apify_token_exists = bool(APIFY_API_TOKEN)
113
- _gemini_fallback_enabled = _gemini_available and bool(GEMINI_API_KEY)
114
 
115
- # --- Configure Gemini Client (Do this once globally if possible) ---
 
116
  if _gemini_fallback_enabled:
117
  try:
118
  genai.configure(api_key=GEMINI_API_KEY)
119
  logger.info("Google GenAI client configured successfully.")
120
  except Exception as e:
121
  logger.error(f"Failed to configure Google GenAI client: {e}")
122
- _gemini_fallback_enabled = False # Disable fallback if config fails
123
 
124
- # (Retry Decorator, Helper Functions remain the same)
125
  # --- Retry Decorator ---
126
  @retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
127
  async def retry_bot_operation(func, *args, **kwargs):
@@ -145,7 +145,6 @@ def extract_youtube_id(url):
145
 
146
 
147
  # --- Content Fetching Functions ---
148
- # (fetch_url_content_for_scrape, get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript, get_website_content, get_website_content_via_api remain the same as previous version)
149
  async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
150
  headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
151
  try:
@@ -235,8 +234,12 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
235
  elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
236
  elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
237
  elif response.status_code == 404:
238
- error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
239
- logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}"); return None
 
 
 
 
240
  else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
241
 
242
  except httpx.TimeoutException as e: logger.error(f"[Apify SyncItems] Timeout during API interaction for {video_url}: {e}"); return None
@@ -244,6 +247,7 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
244
  except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
245
  except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
246
 
 
247
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
248
  global SUPADATA_API_KEY, APIFY_API_TOKEN
249
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
@@ -326,76 +330,38 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
326
  except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
327
 
328
  # --- Summarization Functions ---
329
-
330
- # --- NEW: Gemini Summarization Function ---
331
  async def generate_summary_gemini(text: str, summary_type: str) -> str:
332
  """Generates summary using Google Gemini API (Fallback)."""
333
  global GEMINI_MODEL, _gemini_fallback_enabled
334
- if not _gemini_fallback_enabled:
335
- logger.error("[Gemini Fallback] Called but is disabled (no library or key).")
336
- return "Error: Fallback AI service not available."
337
-
338
  logger.info(f"[Gemini Fallback] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
339
 
340
- # Reuse the same prompts as OpenRouter/DeepSeek for consistency
341
  if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β€’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β€’ Uses British English spellings throughout.\n" "β€’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β€’ Presented as ONE SINGLE PARAGRAPH.\n" "β€’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β€’ Considers the entire text content equally.\n" "β€’ Uses semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
342
  else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β€’ For each distinct topic or section identified in the text, create a heading.\n" "β€’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β€’ Immediately following each heading, list the key points as a bulleted list.\n" "β€’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β€’ The text within each bullet point should NOT contain any bold formatting.\n" "β€’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β€’ Use British English spellings throughout.\n" "β€’ Avoid overly complex or advanced vocabulary.\n" "β€’ Keep bullet points concise.\n" "β€’ Ensure the entire summary takes no more than two minutes to read.\n" "β€’ Consider the entire text's content, not just the beginning or a few topics.\n" "β€’ Use semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
343
-
344
- # Gemini handles large contexts well, but basic truncation is still wise
345
- MAX_INPUT_LENGTH = 1000000 # Gemini Flash limit is ~1M tokens
346
- if len(text) > MAX_INPUT_LENGTH:
347
- logger.warning(f"[Gemini Fallback] Input length ({len(text)}) exceeds assumed limit ({MAX_INPUT_LENGTH}). Truncating.")
348
- text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
349
  full_prompt = f"{prompt}\n\n{text}"
350
-
351
- # Configure safety settings - block potentially harmful content
352
- safety_settings = {
353
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
354
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
355
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
356
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
357
- }
358
 
359
  try:
360
  logger.debug(f"[Gemini Fallback] Initializing model {GEMINI_MODEL}")
361
  model = genai.GenerativeModel(GEMINI_MODEL)
362
  logger.info(f"[Gemini Fallback] Sending request to Gemini ({GEMINI_MODEL})...")
363
- # Use generate_content_async for non-blocking call
364
- response = await model.generate_content_async(
365
- full_prompt,
366
- safety_settings=safety_settings
367
- )
368
  logger.info("[Gemini Fallback] Received response from Gemini.")
369
 
370
- # Check for blocks and extract text
371
- if response.prompt_feedback.block_reason:
372
- logger.error(f"[Gemini Fallback] Request blocked by Gemini. Reason: {response.prompt_feedback.block_reason}")
373
- return f"Sorry, the content could not be summarized by the fallback AI due to safety filters ({response.prompt_feedback.block_reason})."
374
-
375
  summary = response.text
376
- if summary:
377
- logger.info(f"[Gemini Fallback] Success generating summary. Output len: {len(summary)}")
378
- # Apply basic markdown escaping if needed (same as OpenRouter func)
379
- summary = summary.replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
380
- return summary.strip()
381
- else:
382
- logger.warning(f"[Gemini Fallback] Gemini returned an empty summary. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}")
383
- return "Sorry, the fallback AI model returned an empty summary."
384
 
385
- except Exception as e:
386
- # Catch potential API errors (e.g., google.api_core.exceptions) or others
387
- logger.error(f"[Gemini Fallback] Unexpected error during Gemini API call: {e}", exc_info=True)
388
- return "Sorry, an unexpected error occurred while using the fallback AI service."
389
-
390
-
391
- # --- MODIFIED: Primary Summarization Function (Adds Fallback Logic) ---
392
  async def generate_summary(text: str, summary_type: str) -> str:
393
  """Generates summary using OpenRouter (Primary) with Gemini fallback on ReadTimeout."""
394
  global OPENROUTER_API_KEY, OPENROUTER_MODEL, _gemini_fallback_enabled
395
  logger.info(f"[Primary Summary] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
396
  if not OPENROUTER_API_KEY: logger.error("[Primary Summary] OpenRouter key missing."); return "Error: AI model configuration key missing."
397
 
398
- # Prompts (same as before)
399
  if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β€’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β€’ Uses British English spellings throughout.\n" "β€’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β€’ Presented as ONE SINGLE PARAGRAPH.\n" "β€’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β€’ Considers the entire text content equally.\n" "β€’ Uses semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
400
  else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β€’ For each distinct topic or section identified in the text, create a heading.\n" "β€’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β€’ Immediately following each heading, list the key points as a bulleted list.\n" "β€’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β€’ The text within each bullet point should NOT contain any bold formatting.\n" "β€’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β€’ Use British English spellings throughout.\n" "β€’ Avoid overly complex or advanced vocabulary.\n" "β€’ Keep bullet points concise.\n" "β€’ Ensure the entire summary takes no more than two minutes to read.\n" "β€’ Consider the entire text's content, not just the beginning or a few topics.\n" "β€’ Use semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
401
  MAX_INPUT_LENGTH = 500000
@@ -405,7 +371,6 @@ async def generate_summary(text: str, summary_type: str) -> str:
405
  api_timeouts = httpx.Timeout(15.0, read=180.0, write=15.0, pool=60.0); response = None
406
 
407
  try:
408
- # --- Attempt OpenRouter Request ---
409
  async with httpx.AsyncClient(timeout=api_timeouts) as client:
410
  logger.info(f"[Primary Summary] Sending request to OpenRouter ({OPENROUTER_MODEL}) with read timeout {api_timeouts.read}s...")
411
  try:
@@ -413,9 +378,8 @@ async def generate_summary(text: str, summary_type: str) -> str:
413
  if response: logger.info(f"[Primary Summary] Received response from OpenRouter. Status code: {response.status_code}")
414
  else: logger.error("[Primary Summary] No response from OpenRouter (unexpected)."); return "Sorry, primary AI service failed unexpectedly."
415
 
416
- # --- Process OpenRouter Response ---
417
  if response.status_code == 200:
418
- try: # Parsing successful response
419
  data = response.json()
420
  if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
421
  message = data["choices"][0].get("message")
@@ -427,38 +391,33 @@ async def generate_summary(text: str, summary_type: str) -> str:
427
  else: logger.error(f"[Primary Summary] Unexpected choices structure: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse primary AI response (choices)."
428
  except json.JSONDecodeError: logger.error(f"[Primary Summary] Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand primary AI response."
429
  except Exception as e: logger.error(f"[Primary Summary] Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing primary AI response."
430
-
431
- # --- Handle OpenRouter Error Status Codes ---
432
  elif response.status_code == 401: logger.error("[Primary Summary] OpenRouter API key invalid (401)."); return "Error: Primary AI model configuration key is invalid."
433
  elif response.status_code == 402: logger.error("[Primary Summary] OpenRouter Payment Required (402)."); return "Sorry, primary AI service limits/payment issue."
434
  elif response.status_code == 429: logger.warning("[Primary Summary] OpenRouter Rate Limit Exceeded (429)."); return "Sorry, primary AI model is busy. Try again."
435
  elif response.status_code == 500: logger.error(f"[Primary Summary] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, primary AI service internal error."
436
- else: # Other non-200 errors
437
- error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
 
 
 
 
 
438
  logger.error(f"[Primary Summary] Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
439
  return f"Sorry, primary AI service returned unexpected status ({response.status_code})."
440
 
441
- # --- Catch OpenRouter Read Timeout -> Trigger Fallback ---
442
  except httpx.ReadTimeout:
443
  logger.warning(f"[Primary Summary] Read Timeout ({api_timeouts.read}s) waiting for OpenRouter. Attempting Gemini fallback...")
444
- if _gemini_fallback_enabled:
445
- # Call the Gemini function
446
- return await generate_summary_gemini(text, summary_type)
447
- else:
448
- logger.error("[Fallback Attempt] Gemini fallback skipped (disabled or key missing).")
449
- return f"Sorry, the primary AI service timed out after {api_timeouts.read} seconds, and the fallback service is not available."
450
- # --- Catch Other Timeouts (Connect, Write, Pool) ---
451
- except httpx.TimeoutException as e:
452
- logger.error(f"[Primary Summary] Timeout error ({type(e)}) connecting/writing to OpenRouter API: {e}")
453
- return "Sorry, the request to the primary AI model timed out. Please try again."
454
-
455
- # --- Catch Request Errors and Other Exceptions ---
456
  except httpx.RequestError as e: logger.error(f"[Primary Summary] Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the primary AI model service."
457
  except Exception as e:
458
  logger.error(f"[Primary Summary] Unexpected error in generate_summary (Outer try): {e}", exc_info=True)
459
  if response: logger.error(f"--> Last OpenRouter response status before error: {response.status_code}")
460
  return "Sorry, an unexpected error occurred while trying to generate the summary."
461
 
 
462
  # (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
463
  async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
464
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
@@ -498,7 +457,6 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
498
  if content:
499
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
500
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
501
- # Call the primary summary function (which now includes the fallback)
502
  final_summary = await generate_summary(content, summary_type)
503
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
504
  else:
@@ -569,13 +527,13 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
569
 
570
  context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
571
 
572
- global TELEGRAM_TOKEN, OPENROUTER_API_KEY # Check primary keys needed to start task
573
  if not TELEGRAM_TOKEN:
574
  logger.critical("TG TOKEN missing!")
575
  try: await query.edit_message_text(text="❌ Bot config error.")
576
  except Exception: pass
577
  return
578
- if not OPENROUTER_API_KEY: # Check primary summarization key
579
  logger.error("OpenRouter key missing!")
580
  try: await query.edit_message_text(text="❌ AI config error.")
581
  except Exception: pass
@@ -650,13 +608,7 @@ async def health_check(request: Request) -> PlainTextResponse:
650
  if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
651
  else: bot_status = "Initialized/Not running"
652
  except Exception as e: bot_status = f"Error checking status: {e}"
653
- # Updated health check to show both models
654
- return PlainTextResponse(
655
- f"TG Bot Summarizer - Status: {bot_status}\n"
656
- f"Primary Model: {OPENROUTER_MODEL}\n"
657
- f"Fallback Model: {GEMINI_MODEL if _gemini_fallback_enabled else 'N/A (Disabled)'}\n"
658
- f"Apify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}"
659
- )
660
 
661
  async def telegram_webhook(request: Request) -> Response:
662
  global WEBHOOK_SECRET
 
1
+ # main.py (Correcting SyntaxError at line 238 & Updating Gemini Model)
2
  import os
3
  import re
4
  import logging
 
50
  HarmCategory = None
51
  HarmBlockThreshold = None
52
  _gemini_available = False
53
+ # logger will be defined later, log warning after logger setup
54
 
55
 
56
  # --- Logging Setup ---
 
66
  if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
67
  logger = logging.getLogger(__name__)
68
  logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
69
+ if not _gemini_available: logger.warning("google-generativeai library not found. Gemini fallback disabled.")
70
+
71
 
72
  # --- Global variable for PTB app ---
73
  ptb_app: Optional[Application] = None
 
86
  SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
87
  APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
88
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
89
+ GEMINI_API_KEY = get_secret('GEMINI_API_KEY')
90
 
91
  # --- Model Configuration ---
92
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
93
  APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
94
+ # *** FIX: Update default Gemini model ***
95
  GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
96
 
97
  # --- Key Checks ---
98
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
99
  if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Primary summarization will fail.")
 
100
  if _gemini_available and not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found. Fallback summarization via Gemini disabled.")
101
 
102
  if not URLTOTEXT_API_KEY: pass
 
106
 
107
  logger.info("Secret loading and configuration check finished.")
108
  logger.info(f"Using OpenRouter Model (Primary): {OPENROUTER_MODEL}")
109
+ _gemini_fallback_enabled = _gemini_available and bool(GEMINI_API_KEY) # Define after checking key
110
+ if _gemini_fallback_enabled: logger.info(f"Using Gemini Model (Fallback): {GEMINI_MODEL}")
111
  else: logger.info("Gemini Fallback: Disabled (library or API key missing)")
112
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
 
113
  _apify_token_exists = bool(APIFY_API_TOKEN)
 
114
 
115
+
116
+ # --- Configure Gemini Client ---
117
  if _gemini_fallback_enabled:
118
  try:
119
  genai.configure(api_key=GEMINI_API_KEY)
120
  logger.info("Google GenAI client configured successfully.")
121
  except Exception as e:
122
  logger.error(f"Failed to configure Google GenAI client: {e}")
123
+ _gemini_fallback_enabled = False
124
 
 
125
  # --- Retry Decorator ---
126
  @retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
127
  async def retry_bot_operation(func, *args, **kwargs):
 
145
 
146
 
147
  # --- Content Fetching Functions ---
 
148
  async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
149
  headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
150
  try:
 
234
  elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
235
  elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
236
  elif response.status_code == 404:
237
+ error_info = "";
238
+ try: # Correctly indented try/except
239
+ error_info = response.json().get("error", {}).get("message", "")
240
+ except Exception: pass
241
+ logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}");
242
+ return None
243
  else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
244
 
245
  except httpx.TimeoutException as e: logger.error(f"[Apify SyncItems] Timeout during API interaction for {video_url}: {e}"); return None
 
247
  except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
248
  except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
249
 
250
+
251
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
252
  global SUPADATA_API_KEY, APIFY_API_TOKEN
253
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
 
330
  except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
331
 
332
  # --- Summarization Functions ---
 
 
333
  async def generate_summary_gemini(text: str, summary_type: str) -> str:
334
  """Generates summary using Google Gemini API (Fallback)."""
335
  global GEMINI_MODEL, _gemini_fallback_enabled
336
+ if not _gemini_fallback_enabled: logger.error("[Gemini Fallback] Called but is disabled."); return "Error: Fallback AI service not available."
 
 
 
337
  logger.info(f"[Gemini Fallback] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
338
 
 
339
  if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β€’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β€’ Uses British English spellings throughout.\n" "β€’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β€’ Presented as ONE SINGLE PARAGRAPH.\n" "β€’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β€’ Considers the entire text content equally.\n" "β€’ Uses semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
340
  else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β€’ For each distinct topic or section identified in the text, create a heading.\n" "β€’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β€’ Immediately following each heading, list the key points as a bulleted list.\n" "β€’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β€’ The text within each bullet point should NOT contain any bold formatting.\n" "β€’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β€’ Use British English spellings throughout.\n" "β€’ Avoid overly complex or advanced vocabulary.\n" "β€’ Keep bullet points concise.\n" "β€’ Ensure the entire summary takes no more than two minutes to read.\n" "β€’ Consider the entire text's content, not just the beginning or a few topics.\n" "β€’ Use semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
341
+ MAX_INPUT_LENGTH = 1000000
342
+ if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Gemini Fallback] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
 
 
 
 
343
  full_prompt = f"{prompt}\n\n{text}"
344
+ safety_settings = { HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, }
 
 
 
 
 
 
 
345
 
346
  try:
347
  logger.debug(f"[Gemini Fallback] Initializing model {GEMINI_MODEL}")
348
  model = genai.GenerativeModel(GEMINI_MODEL)
349
  logger.info(f"[Gemini Fallback] Sending request to Gemini ({GEMINI_MODEL})...")
350
+ response = await model.generate_content_async( full_prompt, safety_settings=safety_settings )
 
 
 
 
351
  logger.info("[Gemini Fallback] Received response from Gemini.")
352
 
353
+ if response.prompt_feedback.block_reason: logger.error(f"[Gemini Fallback] Request blocked. Reason: {response.prompt_feedback.block_reason}"); return f"Sorry, fallback AI blocked summary due to safety filters ({response.prompt_feedback.block_reason})."
 
 
 
 
354
  summary = response.text
355
+ if summary: logger.info(f"[Gemini Fallback] Success generating summary. Output len: {len(summary)}"); return summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
356
+ else: logger.warning(f"[Gemini Fallback] Gemini returned empty summary. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}"); return "Sorry, the fallback AI model returned an empty summary."
357
+ except Exception as e: logger.error(f"[Gemini Fallback] Unexpected error during Gemini API call: {e}", exc_info=True); return "Sorry, an unexpected error occurred while using the fallback AI service."
 
 
 
 
 
358
 
 
 
 
 
 
 
 
359
  async def generate_summary(text: str, summary_type: str) -> str:
360
  """Generates summary using OpenRouter (Primary) with Gemini fallback on ReadTimeout."""
361
  global OPENROUTER_API_KEY, OPENROUTER_MODEL, _gemini_fallback_enabled
362
  logger.info(f"[Primary Summary] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
363
  if not OPENROUTER_API_KEY: logger.error("[Primary Summary] OpenRouter key missing."); return "Error: AI model configuration key missing."
364
 
 
365
  if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β€’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β€’ Uses British English spellings throughout.\n" "β€’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β€’ Presented as ONE SINGLE PARAGRAPH.\n" "β€’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β€’ Considers the entire text content equally.\n" "β€’ Uses semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
366
  else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β€’ For each distinct topic or section identified in the text, create a heading.\n" "β€’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β€’ Immediately following each heading, list the key points as a bulleted list.\n" "β€’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β€’ The text within each bullet point should NOT contain any bold formatting.\n" "β€’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β€’ Use British English spellings throughout.\n" "β€’ Avoid overly complex or advanced vocabulary.\n" "β€’ Keep bullet points concise.\n" "β€’ Ensure the entire summary takes no more than two minutes to read.\n" "β€’ Consider the entire text's content, not just the beginning or a few topics.\n" "β€’ Use semicolons (;) instead of em dashes (– or β€”).\n\n" "Here is the text to summarise:")
367
  MAX_INPUT_LENGTH = 500000
 
371
  api_timeouts = httpx.Timeout(15.0, read=180.0, write=15.0, pool=60.0); response = None
372
 
373
  try:
 
374
  async with httpx.AsyncClient(timeout=api_timeouts) as client:
375
  logger.info(f"[Primary Summary] Sending request to OpenRouter ({OPENROUTER_MODEL}) with read timeout {api_timeouts.read}s...")
376
  try:
 
378
  if response: logger.info(f"[Primary Summary] Received response from OpenRouter. Status code: {response.status_code}")
379
  else: logger.error("[Primary Summary] No response from OpenRouter (unexpected)."); return "Sorry, primary AI service failed unexpectedly."
380
 
 
381
  if response.status_code == 200:
382
+ try:
383
  data = response.json()
384
  if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
385
  message = data["choices"][0].get("message")
 
391
  else: logger.error(f"[Primary Summary] Unexpected choices structure: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse primary AI response (choices)."
392
  except json.JSONDecodeError: logger.error(f"[Primary Summary] Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand primary AI response."
393
  except Exception as e: logger.error(f"[Primary Summary] Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing primary AI response."
 
 
394
  elif response.status_code == 401: logger.error("[Primary Summary] OpenRouter API key invalid (401)."); return "Error: Primary AI model configuration key is invalid."
395
  elif response.status_code == 402: logger.error("[Primary Summary] OpenRouter Payment Required (402)."); return "Sorry, primary AI service limits/payment issue."
396
  elif response.status_code == 429: logger.warning("[Primary Summary] OpenRouter Rate Limit Exceeded (429)."); return "Sorry, primary AI model is busy. Try again."
397
  elif response.status_code == 500: logger.error(f"[Primary Summary] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, primary AI service internal error."
398
+ else:
399
+ # *** FIX: Correct Indentation for error info extraction ***
400
+ error_info = ""
401
+ try:
402
+ error_info = response.json().get("error", {}).get("message", "")
403
+ except Exception:
404
+ pass
405
  logger.error(f"[Primary Summary] Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
406
  return f"Sorry, primary AI service returned unexpected status ({response.status_code})."
407
 
 
408
  except httpx.ReadTimeout:
409
  logger.warning(f"[Primary Summary] Read Timeout ({api_timeouts.read}s) waiting for OpenRouter. Attempting Gemini fallback...")
410
+ if _gemini_fallback_enabled: return await generate_summary_gemini(text, summary_type)
411
+ else: logger.error("[Fallback Attempt] Gemini fallback skipped (disabled or key missing)."); return f"Sorry, the primary AI service timed out after {api_timeouts.read} seconds, and the fallback service is not available."
412
+ except httpx.TimeoutException as e: logger.error(f"[Primary Summary] Timeout error ({type(e)}) connecting/writing to OpenRouter API: {e}"); return "Sorry, the request to the primary AI model timed out. Please try again."
413
+
 
 
 
 
 
 
 
 
414
  except httpx.RequestError as e: logger.error(f"[Primary Summary] Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the primary AI model service."
415
  except Exception as e:
416
  logger.error(f"[Primary Summary] Unexpected error in generate_summary (Outer try): {e}", exc_info=True)
417
  if response: logger.error(f"--> Last OpenRouter response status before error: {response.status_code}")
418
  return "Sorry, an unexpected error occurred while trying to generate the summary."
419
 
420
+
421
  # (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
422
  async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
423
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
 
457
  if content:
458
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
459
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
 
460
  final_summary = await generate_summary(content, summary_type)
461
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
462
  else:
 
527
 
528
  context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
529
 
530
+ global TELEGRAM_TOKEN, OPENROUTER_API_KEY
531
  if not TELEGRAM_TOKEN:
532
  logger.critical("TG TOKEN missing!")
533
  try: await query.edit_message_text(text="❌ Bot config error.")
534
  except Exception: pass
535
  return
536
+ if not OPENROUTER_API_KEY:
537
  logger.error("OpenRouter key missing!")
538
  try: await query.edit_message_text(text="❌ AI config error.")
539
  except Exception: pass
 
608
  if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
609
  else: bot_status = "Initialized/Not running"
610
  except Exception as e: bot_status = f"Error checking status: {e}"
611
+ return PlainTextResponse( f"TG Bot Summarizer - Status: {bot_status}\n" f"Primary Model: {OPENROUTER_MODEL}\n" f"Fallback Model: {GEMINI_MODEL if _gemini_fallback_enabled else 'N/A (Disabled)'}\n" f"Apify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}" )
 
 
 
 
 
 
612
 
613
  async def telegram_webhook(request: Request) -> Response:
614
  global WEBHOOK_SECRET