fmab777 commited on
Commit
a59041f
Β·
verified Β·
1 Parent(s): 8b1f48a

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +182 -259
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Updated for Groq Llama 4, Gemini 2.5 Pro/Flash, DeepSeek hierarchy)
2
  import os
3
  import re
4
  import logging
@@ -7,7 +7,7 @@ import json
7
  import html
8
  import contextlib
9
  import traceback
10
- import urllib.parse # Added for URL encoding
11
  from typing import Optional, Dict, Any, Tuple
12
 
13
  # --- Frameworks ---
@@ -45,15 +45,14 @@ except ImportError:
45
  try:
46
  import google.generativeai as genai
47
  from google.generativeai.types import HarmCategory, HarmBlockThreshold
48
- _gemini_sdk_available = True # <<< CHANGE: Renamed flag
49
  except ImportError:
50
  genai = None
51
  HarmCategory = None
52
  HarmBlockThreshold = None
53
  _gemini_sdk_available = False
54
- # logger will be defined later, log warning after logger setup
55
 
56
- # --- Groq SDK --- # <<< CHANGE: Added Groq import
57
  try:
58
  from groq import Groq, GroqError
59
  _groq_sdk_available = True
@@ -72,11 +71,11 @@ logging.getLogger('gunicorn.error').setLevel(logging.INFO)
72
  logging.getLogger('uvicorn').setLevel(logging.INFO)
73
  logging.getLogger('starlette').setLevel(logging.INFO)
74
  if _gemini_sdk_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
75
- if _groq_sdk_available: logging.getLogger("groq").setLevel(logging.INFO) # <<< CHANGE: Add Groq logger config
76
  logger = logging.getLogger(__name__)
77
  logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
78
  if not _gemini_sdk_available: logger.warning("google-generativeai library not found. Gemini functionality disabled.")
79
- if not _groq_sdk_available: logger.warning("groq library not found. Groq functionality disabled.") # <<< CHANGE: Log Groq status
80
 
81
 
82
  # --- Global variable for PTB app ---
@@ -91,27 +90,27 @@ def get_secret(secret_name):
91
  return value
92
 
93
  TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
94
- GROQ_API_KEY = get_secret('GROQ_API_KEY') # <<< CHANGE: Added Groq key
95
- GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Used for Gemini 2.5 Pro and 2.0 Flash
96
- OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY') # Summarizer Fallback 3
97
- URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY') # Scrape Fallback 1
98
- SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY') # YT Fallback 1
99
- APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN') # YT Fallback 2 + Scrape Fallbacks 4 & 5
100
- RAPIDAPI_KEY = get_secret('RAPIDAPI_KEY') # Scrape Fallbacks 2 & 3
101
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
102
 
103
- # <<< CHANGE: Updated Model Configurations >>>
104
  # Model Priority:
105
- # 1. Groq Llama 4
106
- # 2. Gemini 2.5 Pro
107
  # 3. Gemini 2.0 Flash
108
- # 4. OpenRouter DeepSeek
109
- GROQ_LLAMA_MODEL = os.environ.get("GROQ_LLAMA_MODEL", "llama3-70b-8192") # Default to Llama 3 70b on Groq (Llama 4 Scout not available via API yet - Fall 2024)
110
- GEMINI_PRO_MODEL = os.environ.get("GEMINI_PRO_MODEL", "gemini-1.5-pro-latest") # Gemini 2.5 Pro equivalent (as of Fall 2024)
111
- GEMINI_FLASH_MODEL = os.environ.get("GEMINI_FLASH_MODEL", "gemini-1.5-flash-latest") # Gemini 2.0 Flash equivalent (as of Fall 2024)
112
- OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat") # Fallback 3 Model (Updated Deepseek name)
113
 
114
- APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") # Default YT Actor
115
  APIFY_CRAWLER_ACTOR_ID = "apify/website-content-crawler" # Scrape Fallback 4
116
  APIFY_TEXT_SCRAPER_ACTOR_ID = "karamelo/text-scraper-free" # Scrape Fallback 5
117
 
@@ -120,23 +119,21 @@ if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found.");
120
 
121
  # Summarizer Availability Checks
122
  _groq_enabled = _groq_sdk_available and bool(GROQ_API_KEY)
123
- _gemini_api_enabled = _gemini_sdk_available and bool(GEMINI_API_KEY) # <<< CHANGE: Renamed flag
124
  _openrouter_fallback_enabled = bool(OPENROUTER_API_KEY)
125
 
126
  if not _groq_enabled:
127
- if not _groq_sdk_available: logger.error("❌ ERROR: groq library missing. Groq (Llama) disabled.")
128
- elif not GROQ_API_KEY: logger.error("❌ ERROR: GROQ_API_KEY not found. Primary summarization (Groq Llama) will fail.")
129
  if not _gemini_api_enabled:
130
  if not _gemini_sdk_available: logger.warning("⚠️ WARNING: google-generativeai library missing. Gemini disabled.")
131
- elif not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found. Gemini summarization fallbacks (Pro/Flash) will fail.")
132
  if not _openrouter_fallback_enabled: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Final fallback summarization (DeepSeek) will fail.")
133
 
134
  if not _groq_enabled and not _gemini_api_enabled and not _openrouter_fallback_enabled:
135
  logger.critical("❌ FATAL: No summarization models are configured or enabled. Bot cannot function.")
136
- # Depending on deployment, might want to raise RuntimeError here
137
- # raise RuntimeError("No summarization models configured.")
138
  elif not _groq_enabled:
139
- logger.warning("⚠️ Primary summarizer (Groq Llama) is disabled. Will start with Gemini Pro.")
140
 
141
  # Scraper Availability Checks (Warnings only)
142
  if not RAPIDAPI_KEY: logger.warning("⚠️ WARNING: RAPIDAPI_KEY not found. RapidAPI scraping fallbacks (2 & 3) will be unavailable.")
@@ -146,25 +143,25 @@ if not SUPADATA_API_KEY: logger.warning("Optional secret 'SUPADATA_API_KEY' not
146
  if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
147
 
148
  logger.info("Secret loading and configuration check finished.")
149
- logger.info(f"Summarizer 1 (Groq): {GROQ_LLAMA_MODEL if _groq_enabled else 'DISABLED'}") # <<< CHANGE
150
- logger.info(f"Summarizer 2 (Gemini Pro): {GEMINI_PRO_MODEL if _gemini_api_enabled else 'DISABLED'}") # <<< CHANGE
151
- logger.info(f"Summarizer 3 (Gemini Flash): {GEMINI_FLASH_MODEL if _gemini_api_enabled else 'DISABLED'}") # <<< CHANGE
152
- logger.info(f"Summarizer 4 (OpenRouter): {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}") # <<< CHANGE
153
  logger.info(f"Using Apify Actor (YT Default): {APIFY_ACTOR_ID}")
154
  logger.info(f"Using Apify Actor (Web Scrape Fallback 4): {APIFY_CRAWLER_ACTOR_ID}")
155
  logger.info(f"Using Apify Actor (Web Scrape Fallback 5): {APIFY_TEXT_SCRAPER_ACTOR_ID}")
156
 
157
- # Flags for scraper key existence (unchanged naming)
158
  _apify_token_exists = bool(APIFY_API_TOKEN)
159
  _urltotext_key_exists = bool(URLTOTEXT_API_KEY)
160
  _rapidapi_key_exists = bool(RAPIDAPI_KEY)
161
 
162
  # --- Configure APIs ---
163
- if _gemini_api_enabled: # <<< CHANGE: Use renamed flag
164
  try: genai.configure(api_key=GEMINI_API_KEY); logger.info("Google GenAI client configured successfully.")
165
  except Exception as e: logger.error(f"Failed to configure Google GenAI client: {e}"); _gemini_api_enabled = False
166
 
167
- # Groq client is often initialized per-request or within the function call
168
 
169
  # --- Retry Decorator ---
170
  @retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
@@ -187,14 +184,13 @@ def extract_youtube_id(url):
187
  if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
188
  else: logger.warning(f"Could not extract YT ID from {url}"); return None
189
 
190
-
191
  # --- Content Fetching Functions ---
192
  # (These functions: get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript,
193
  # fetch_url_content_for_scrape, get_website_content, get_website_content_via_api,
194
  # get_website_content_via_scrapers_proxy, get_website_content_via_ai_web_scraper,
195
  # _run_apify_actor_for_web_content, get_website_content_via_apify_crawler,
196
  # get_website_content_via_apify_text_scraper remain UNCHANGED. They are omitted here for brevity
197
- # but should be included in the final main.py file)
198
  # --- START OMITTED CONTENT FETCHING FUNCTIONS ---
199
  async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
200
  if not video_id: logger.error("[Supadata] No video_id provided"); return None
@@ -228,44 +224,30 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
228
 
229
  async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
230
  """Fallback YT 2: Fetches YouTube transcript using default Apify Actor."""
231
- global APIFY_ACTOR_ID # Uses the default YT actor ID
232
  if not video_url: logger.error("[Apify YT] No video_url provided"); return None
233
  if not api_token: logger.error("[Apify YT] API token missing."); return None
234
  logger.info(f"[YT Fallback 2] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
235
-
236
  sync_items_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
237
  params = {"token": api_token}
238
- payload = {
239
- "urls": [video_url],
240
- "outputFormat": "singleStringText",
241
- "maxRetries": 5,
242
- "channelHandleBoolean": False,
243
- "channelNameBoolean": False,
244
- "datePublishedBoolean": False,
245
- "relativeDateTextBoolean": False,
246
- }
247
  headers = {"Content-Type": "application/json"}
248
-
249
  try:
250
- async with httpx.AsyncClient(timeout=120.0) as client: # Long timeout for potential YT processing
251
  logger.debug(f"[Apify YT] POST Request to {sync_items_endpoint} for {video_url}")
252
  response = await client.post(sync_items_endpoint, headers=headers, params=params, json=payload)
253
  logger.debug(f"[Apify YT] Received status code {response.status_code} for {video_url}")
254
-
255
  if response.status_code == 200:
256
  try:
257
  results = response.json()
258
  if isinstance(results, list) and len(results) > 0:
259
- item = results[0]
260
- content = None
261
- # Check common keys for transcript text
262
  if "captions" in item and isinstance(item["captions"], str): content = item["captions"]
263
  elif "text" in item and isinstance(item["text"], str): content = item["text"]
264
  elif "transcript" in item and isinstance(item["transcript"], str): content = item["transcript"]
265
- elif "captions" in item and isinstance(item["captions"], list): # Handle list format if needed
266
  if len(item["captions"]) > 0 and isinstance(item["captions"][0], dict) and 'text' in item["captions"][0]: content = " ".join(line.get("text", "") for line in item["captions"] if line.get("text"))
267
  elif len(item["captions"]) > 0 and isinstance(item["captions"][0], str): content = " ".join(item["captions"])
268
-
269
  if content and isinstance(content, str): logger.info(f"[Apify YT] Success via REST for {video_url}. Length: {len(content)}"); return content.strip()
270
  else: logger.warning(f"[Apify YT] Dataset item parsed but transcript content empty/invalid format for {video_url}. Item keys: {list(item.keys())}"); return None
271
  else: logger.warning(f"[Apify YT] Actor success but dataset was empty for {video_url}. Response: {results}"); return None
@@ -281,11 +263,10 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
281
  except Exception as e: logger.error(f"[Apify YT] Unexpected error during Apify YT call for {video_url}: {e}", exc_info=True); return None
282
 
283
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
284
- global SUPADATA_API_KEY, APIFY_API_TOKEN, _apify_token_exists # <<< Added _apify_token_exists
285
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
286
  logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
287
  transcript_text = None
288
- # Method 1: youtube-transcript-api (Primary)
289
  logger.info("[Primary YT] Attempting youtube-transcript-api...")
290
  try:
291
  transcript_list = await asyncio.to_thread( YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'] )
@@ -295,8 +276,6 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
295
  except NoTranscriptFound: logger.warning(f"[Primary YT] No transcript found via lib for {video_id}.")
296
  except TranscriptsDisabled: logger.warning(f"[Primary YT] Transcripts disabled via lib for {video_id}.")
297
  except Exception as e: logger.warning(f"[Primary YT] Error via lib for {video_id}: {e}"); transcript_text = None
298
-
299
- # Method 2: Supadata (Fallback 1)
300
  if transcript_text is None:
301
  logger.info("[Fallback YT 1] Trying Supadata API...")
302
  if SUPADATA_API_KEY:
@@ -304,33 +283,26 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
304
  if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text
305
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
306
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
307
-
308
- # Method 3: Apify (Fallback 2 - Default YT Actor)
309
  if transcript_text is None:
310
  logger.info("[Fallback YT 2] Trying Apify REST API (Default YT Actor)...")
311
- if _apify_token_exists: # Use the global flag
312
  transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
313
  if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify Default YT Actor for {video_url}"); return transcript_text
314
  else: logger.warning(f"[Fallback YT 2] Apify Default YT Actor failed or no content for {video_url}.")
315
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
316
-
317
- # Final Result
318
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
319
  return transcript_text
320
 
321
  async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
322
- """Directly fetches URL content using httpx. (Primary Web Method - Fetching part)"""
323
  headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
324
  try:
325
  async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
326
  logger.debug(f"[Web Scrape Direct] Sending GET request to {url}")
327
  response = await client.get(url)
328
  logger.debug(f"[Web Scrape Direct] Received response {response.status_code} from {url}")
329
- response.raise_for_status() # Raise HTTPStatusError for 4xx/5xx
330
  content_type = response.headers.get('content-type', '').lower()
331
- if 'html' not in content_type:
332
- logger.warning(f"[Web Scrape Direct] Non-HTML content type received from {url}: {content_type}")
333
- return None
334
  try: return response.text
335
  except Exception as e: logger.error(f"[Web Scrape Direct] Error decoding response text for {url}: {e}"); return None
336
  except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape Direct] HTTP error {e.response.status_code} fetching {url}: {e}")
@@ -341,19 +313,15 @@ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[
341
  return None
342
 
343
  async def get_website_content(url: str) -> Optional[str]:
344
- """Primary method: Fetches HTML directly and parses with BeautifulSoup."""
345
  if not url: logger.error("[Web Scrape Primary] No URL provided"); return None
346
  logger.info(f"[Web Scrape Primary] Attempting direct fetch and parse for: {url}")
347
  html_content = await fetch_url_content_for_scrape(url)
348
- if not html_content:
349
- logger.warning(f"[Web Scrape Primary] Direct fetch failed for {url}.")
350
- return None
351
  try:
352
  def parse_html(content: str) -> Optional[str]:
353
  try:
354
  soup = BeautifulSoup(content, DEFAULT_PARSER)
355
- for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "textarea", "select", "option", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "picture", "source", "map", "area"]):
356
- element.extract()
357
  main_content = soup.find('main') or soup.find('article') or soup.find(role='main') or soup.find(id=re.compile(r'content|main|body', re.I)) or soup.find(class_=re.compile(r'content|main|body|article|post', re.I))
358
  target_element = main_content if main_content else soup.body
359
  if not target_element: logger.warning(f"[Web Scrape Primary Parse] Could not find body or main content container for {url}"); return None
@@ -368,7 +336,6 @@ async def get_website_content(url: str) -> Optional[str]:
368
  except Exception as e: logger.error(f"[Web Scrape Primary] Unexpected error during parsing process for {url}: {e}", exc_info=True); return None
369
 
370
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
371
- """Fallback 1: Fetches website content using urltotext.com API."""
372
  if not url: logger.error("[Web Scrape Fallback 1] No URL"); return None
373
  if not api_key: logger.error("[Web Scrape Fallback 1] urltotext.com API key missing."); return None
374
  logger.info(f"[Web Scrape Fallback 1] Attempting fetch for: {url} using urltotext.com API")
@@ -396,7 +363,6 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
396
  except Exception as e: logger.error(f"[Web Scrape Fallback 1] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
397
 
398
  async def get_website_content_via_scrapers_proxy(url: str, api_key: str) -> Optional[str]:
399
- """Fallback 2: Fetches website content using Scraper's Proxy Parser via RapidAPI."""
400
  if not url: logger.error("[Web Scrape Fallback 2] No URL provided"); return None
401
  if not api_key: logger.error("[Web Scrape Fallback 2] RapidAPI key missing."); return None
402
  logger.info(f"[Web Scrape Fallback 2] Attempting fetch for: {url} using Scraper's Proxy Parser API")
@@ -430,7 +396,6 @@ async def get_website_content_via_scrapers_proxy(url: str, api_key: str) -> Opti
430
  except Exception as e: logger.error(f"[Web Scrape Fallback 2] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None
431
 
432
  async def get_website_content_via_ai_web_scraper(url: str, api_key: str) -> Optional[str]:
433
- """Fallback 3: Fetches website content using AI Web Scraper via RapidAPI."""
434
  if not url: logger.error("[Web Scrape Fallback 3] No URL provided"); return None
435
  if not api_key: logger.error("[Web Scrape Fallback 3] RapidAPI key missing."); return None
436
  logger.info(f"[Web Scrape Fallback 3] Attempting fetch for: {url} using AI Web Scraper API")
@@ -467,7 +432,6 @@ async def get_website_content_via_ai_web_scraper(url: str, api_key: str) -> Opti
467
  except Exception as e: logger.error(f"[Web Scrape Fallback 3] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None
468
 
469
  async def _run_apify_actor_for_web_content(url: str, api_token: str, actor_id: str, actor_name: str) -> Optional[str]:
470
- """Generic function to run an Apify actor and get text content."""
471
  if not url: logger.error(f"[{actor_name}] No URL provided"); return None
472
  if not api_token: logger.error(f"[{actor_name}] API token missing."); return None
473
  logger.info(f"[{actor_name}] Attempting fetch for URL: {url} (Actor: {actor_id})")
@@ -510,16 +474,14 @@ async def _run_apify_actor_for_web_content(url: str, api_token: str, actor_id: s
510
  except Exception as e: logger.error(f"[{actor_name}] Unexpected error during {actor_name} call for {url}: {e}", exc_info=True); return None
511
 
512
  async def get_website_content_via_apify_crawler(url: str, api_token: str) -> Optional[str]:
513
- """Fallback 4: Fetches website content using Apify Website Content Crawler."""
514
  return await _run_apify_actor_for_web_content( url=url, api_token=api_token, actor_id=APIFY_CRAWLER_ACTOR_ID, actor_name="Apify Crawler" )
515
 
516
  async def get_website_content_via_apify_text_scraper(url: str, api_token: str) -> Optional[str]:
517
- """Fallback 5: Fetches website content using Apify Text Scraper Free."""
518
  return await _run_apify_actor_for_web_content( url=url, api_token=api_token, actor_id=APIFY_TEXT_SCRAPER_ACTOR_ID, actor_name="Apify Text Scraper" )
519
  # --- END OMITTED CONTENT FETCHING FUNCTIONS ---
520
 
521
 
522
- # --- Summarization Functions (REVISED SECTION) ---
523
 
524
  # --- Prompts (Defined once, used by all models) ---
525
  PROMPT_PARAGRAPH = (
@@ -554,38 +516,33 @@ PROMPT_POINTS = (
554
  "Here is the text to summarise:"
555
  )
556
 
557
- # <<< CHANGE: New function for Groq API Call >>>
558
  async def _call_groq(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
559
- """Internal function to call Groq API (Primary). Returns (summary, error_message)."""
560
- global GROQ_API_KEY, GROQ_LLAMA_MODEL, _groq_enabled
561
  if not _groq_enabled:
562
  logger.error("[Groq Primary] Called but is disabled.");
563
- return None, "Error: Primary AI service (Groq Llama) not configured/available."
564
- logger.info(f"[Groq Primary] Generating {summary_type} summary using {GROQ_LLAMA_MODEL}. Input length: {len(text)}")
565
 
566
  prompt = PROMPT_PARAGRAPH if summary_type == "paragraph" else PROMPT_POINTS
567
 
568
- # Input Length Check (Llama 3 70b on Groq supports 8192 tokens - text length is a rough proxy)
569
- # Let's be conservative, ~3 chars per token -> ~24k chars. Use 20k for safety margin.
570
- MAX_INPUT_LENGTH_GROQ = 20000
571
  if len(text) > MAX_INPUT_LENGTH_GROQ:
572
  logger.warning(f"[Groq Primary] Input length ({len(text)}) exceeds estimated limit ({MAX_INPUT_LENGTH_GROQ}). Truncating.");
573
  text = text[:MAX_INPUT_LENGTH_GROQ] + "... (Content truncated)"
574
  full_prompt = f"{prompt}\n\n{text}"
575
 
576
  try:
577
- # Initialize client within the function with timeout
578
- groq_client = Groq(
579
- api_key=GROQ_API_KEY,
580
- timeout=httpx.Timeout(120.0, connect=10.0) # 120s read timeout, 10s connect
581
- )
582
- logger.info(f"[Groq Primary] Sending request to Groq ({GROQ_LLAMA_MODEL})...")
583
 
584
  chat_completion = await groq_client.chat.completions.create(
585
  messages=[ { "role": "user", "content": full_prompt } ],
586
- model=GROQ_LLAMA_MODEL,
587
- temperature=0.7, # Adjust temperature as needed (0.5-0.8 is often good for summaries)
588
- max_tokens=2048, # Max tokens for the *output* summary
589
  top_p=1,
590
  stream=False,
591
  stop=None,
@@ -598,30 +555,27 @@ async def _call_groq(text: str, summary_type: str) -> Tuple[Optional[str], Optio
598
  logger.info(f"[Groq Primary] Success generating summary. Finish Reason: {finish_reason}. Output len: {len(summary)}");
599
  return summary.strip(), None
600
  else:
601
- # Handle cases where response structure is unexpected or content is empty
602
  logger.warning(f"[Groq Primary] Groq response structure unexpected or content empty. Response: {chat_completion.model_dump_json(indent=2)}")
603
  finish_reason = chat_completion.choices[0].finish_reason if chat_completion.choices else 'N/A'
604
- return None, f"Sorry, the primary AI model ({GROQ_LLAMA_MODEL}) provided an empty or invalid response (Finish Reason: {finish_reason})."
605
 
606
  except GroqError as ge:
607
- # Handle API-specific errors (rate limits, auth issues etc.)
608
- logger.error(f"[Groq Primary] Groq API error: {ge.status_code} - {ge.message}", exc_info=False) # Keep log cleaner for common errors
609
- error_msg = f"Sorry, the primary AI service ({GROQ_LLAMA_MODEL}) failed. API Error: {ge.status_code}."
610
- if ge.status_code == 401: error_msg = "Error: Primary AI service (Groq) API key is invalid."
611
- elif ge.status_code == 429: error_msg = f"Sorry, primary AI model ({GROQ_LLAMA_MODEL}) is busy (Rate Limit). Try again."
612
  return None, error_msg
613
  except httpx.TimeoutException as te:
614
  logger.error(f"[Groq Primary] Timeout during Groq API call: {te}")
615
- return None, f"Sorry, the primary AI service ({GROQ_LLAMA_MODEL}) timed out."
616
  except httpx.RequestError as re:
617
  logger.error(f"[Groq Primary] Network error during Groq API call: {re}")
618
- return None, f"Sorry, couldn't connect to the primary AI service ({GROQ_LLAMA_MODEL})."
619
  except Exception as e:
620
  logger.error(f"[Groq Primary] Unexpected error during Groq API call: {e}", exc_info=True);
621
- return None, f"Sorry, an unexpected error occurred while using the primary AI service ({GROQ_LLAMA_MODEL})."
622
 
623
 
624
- # <<< CHANGE: Modified function to accept model_name >>>
625
  async def _call_gemini(text: str, summary_type: str, model_name: str) -> Tuple[Optional[str], Optional[str]]:
626
  """Internal function to call Gemini API. Returns (summary, error_message)."""
627
  global _gemini_api_enabled
@@ -631,22 +585,15 @@ async def _call_gemini(text: str, summary_type: str, model_name: str) -> Tuple[O
631
  logger.info(f"[Gemini {model_name}] Generating {summary_type} summary using {model_name}. Input length: {len(text)}")
632
 
633
  prompt = PROMPT_PARAGRAPH if summary_type == "paragraph" else PROMPT_POINTS
634
-
635
- # Input Length Check (Gemini 1.5 has large context, but let's keep a practical limit)
636
- MAX_INPUT_LENGTH_GEMINI = 900000 # Keep previous limit, seems reasonable
637
  if len(text) > MAX_INPUT_LENGTH_GEMINI:
638
  logger.warning(f"[Gemini {model_name}] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH_GEMINI}). Truncating.");
639
  text = text[:MAX_INPUT_LENGTH_GEMINI] + "... (Content truncated)"
640
  full_prompt = f"{prompt}\n\n{text}"
641
-
642
- # Safety Settings (Block None)
643
  safety_settings = { HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, }
644
- # <<< CHANGE: Removed specific check for CIVIC_INTEGRITY, rely on hasattr instead >>>
645
- # Handle potential new categories gracefully
646
  for category_name in dir(HarmCategory):
647
  if category_name.startswith('HARM_CATEGORY_') and getattr(HarmCategory, category_name) not in safety_settings:
648
  safety_settings[getattr(HarmCategory, category_name)] = HarmBlockThreshold.BLOCK_NONE
649
-
650
  logger.debug(f"[Gemini {model_name}] Using safety settings: { {k.name: v.name for k, v in safety_settings.items()} }")
651
 
652
  try:
@@ -655,89 +602,77 @@ async def _call_gemini(text: str, summary_type: str, model_name: str) -> Tuple[O
655
  logger.info(f"[Gemini {model_name}] Sending request to Gemini ({model_name})...")
656
  request_options = {"timeout": 120}
657
  response = await model.generate_content_async(
658
- full_prompt,
659
- generation_config=genai.types.GenerationConfig(), # Basic config
660
- safety_settings=safety_settings,
661
- request_options=request_options
662
- )
663
  logger.info(f"[Gemini {model_name}] Received response from Gemini.")
664
-
665
- # Check for immediate blocking reasons
666
  if response.prompt_feedback and response.prompt_feedback.block_reason:
667
  block_reason_str = getattr(response.prompt_feedback.block_reason, 'name', str(response.prompt_feedback.block_reason))
668
  logger.warning(f"[Gemini {model_name}] Request blocked by API. Reason: {block_reason_str}");
669
  return None, f"Sorry, the AI model ({model_name}) blocked the request (Reason: {block_reason_str})."
670
-
671
- # Check candidate-level blocking and extract text safely
672
- summary = None
673
- finish_reason_str = 'UNKNOWN'
674
  if response.candidates:
675
  candidate = response.candidates[0]
676
  finish_reason_name = getattr(candidate.finish_reason, 'name', None)
677
  finish_reason_str = finish_reason_name or 'N/A'
678
-
679
  if finish_reason_name == 'SAFETY':
680
  safety_ratings_str = ", ".join([f"{rating.category.name}: {rating.probability.name}" for rating in candidate.safety_ratings])
681
  logger.warning(f"[Gemini {model_name}] Candidate blocked due to SAFETY. Finish Reason: {finish_reason_str}. Ratings: [{safety_ratings_str}]")
682
  return None, f"Sorry, the AI model ({model_name}) blocked the response due to safety filters ({finish_reason_str})."
683
  elif finish_reason_name not in ['STOP', 'MAX_TOKENS', None]:
684
  logger.warning(f"[Gemini {model_name}] Candidate finished with non-standard reason: {finish_reason_str}")
685
-
686
- # Safely access content text
687
  if candidate.content and candidate.content.parts:
688
  summary = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
689
-
690
- # Fallback check via response.text
691
  if summary is None:
692
  try: summary = response.text
693
- except ValueError as e: logger.warning(f"[Gemini {model_name}] Error accessing response.text (likely blocked content based on previous checks): {e}"); summary = None
694
-
695
  if summary:
696
  logger.info(f"[Gemini {model_name}] Success generating summary. Finish Reason: {finish_reason_str}. Output len: {len(summary)}");
697
  return summary.strip(), None
698
  else:
699
  logger.warning(f"[Gemini {model_name}] Gemini returned empty summary or content was blocked. Final Finish Reason: {finish_reason_str}");
700
  return None, f"Sorry, the AI model ({model_name}) did not provide a summary (Finish Reason: {finish_reason_str})."
701
-
702
  except AttributeError as ae:
703
  logger.error(f"[Gemini {model_name}] AttributeError during Gemini response processing: {ae}. SDK might be incompatible or response structure unexpected.", exc_info=True);
704
  return None, f"Sorry, there was an issue processing the response from the AI service ({model_name})."
705
  except Exception as e:
706
  logger.error(f"[Gemini {model_name}] Unexpected error during Gemini API call: {e}", exc_info=True);
707
- # Check for specific Gemini API errors if possible (e.g., AuthenticationFailed, RateLimitExceeded)
708
- # This might require inspecting the error details or type.
709
  error_msg = f"Sorry, an unexpected error occurred while using the AI service ({model_name})."
710
- # Example: if "API key not valid" in str(e): error_msg = "Error: AI service (Gemini) API key is invalid."
711
  return None, error_msg
712
 
713
-
714
- # <<< CHANGE: Function remains the same, but is now the last fallback >>>
715
  async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
716
- """Internal function to call OpenRouter API (Final Fallback). Returns (summary, error_message)."""
717
- global OPENROUTER_API_KEY, OPENROUTER_MODEL, _openrouter_fallback_enabled
718
  if not _openrouter_fallback_enabled:
719
  logger.error("[OpenRouter Fallback] Called but is disabled.");
720
  return None, "Error: Final fallback AI service (OpenRouter) not configured/available."
721
- logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
722
 
723
  prompt = PROMPT_PARAGRAPH if summary_type == "paragraph" else PROMPT_POINTS
724
-
725
- # Input Length Check (Adjust if DeepSeek model limit is known, 100k is generous)
726
- MAX_INPUT_LENGTH_OR = 100000
727
  if len(text) > MAX_INPUT_LENGTH_OR:
728
- logger.warning(f"[OpenRouter Fallback] Input length ({len(text)}) exceeds estimated limit ({MAX_INPUT_LENGTH_OR}) for {OPENROUTER_MODEL}. Truncating.");
729
  text = text[:MAX_INPUT_LENGTH_OR] + "... (Content truncated)"
730
  full_prompt = f"{prompt}\n\n{text}"
731
 
732
- headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json", "HTTP-Referer": "https://github.com/your-repo", "X-Title": "TelegramSummariserBot" }
733
- payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}], }
 
 
 
 
 
 
 
 
 
734
  openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
735
- api_timeouts = httpx.Timeout(connect=10.0, read=60.0, write=10.0, pool=60.0) # Slightly longer read timeout for fallback
736
  response = None
737
 
738
  try:
739
  async with httpx.AsyncClient(timeout=api_timeouts) as client:
740
- logger.info(f"[OpenRouter Fallback] Sending request to OpenRouter ({OPENROUTER_MODEL}) with read timeout {api_timeouts.read}s...")
741
  response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
742
  logger.info(f"[OpenRouter Fallback] Received response from OpenRouter. Status code: {response.status_code}")
743
 
@@ -749,116 +684,112 @@ async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str],
749
  if message and isinstance(message, dict):
750
  summary = message.get("content")
751
  if summary: logger.info(f"[OpenRouter Fallback] Success via OpenRouter. Finish: {finish_reason}. Output len: {len(summary)}"); return summary.strip(), None
752
- else: logger.warning(f"[OpenRouter Fallback] OpenRouter success but content empty. Finish: {finish_reason}. Resp: {data}"); return None, f"Sorry, the fallback AI model ({OPENROUTER_MODEL}) returned an empty summary (Finish: {finish_reason})."
753
  else: logger.error(f"[OpenRouter Fallback] Unexpected message structure: {message}. Finish: {finish_reason}. Full: {data}"); return None, "Sorry, could not parse fallback AI response (message format)."
754
  else:
755
  error_details = data.get("error", {}); logger.error(f"[OpenRouter Fallback] Unexpected choices structure or error in response: {data.get('choices')}. Error: {error_details}. Full: {data}");
756
  return None, f"Sorry, could not parse fallback AI response (choices structure or error: {error_details.get('message', 'Unknown')})."
757
  except json.JSONDecodeError: logger.error(f"[OpenRouter Fallback] Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return None, "Sorry, failed to understand fallback AI response."
758
  except Exception as e: logger.error(f"[OpenRouter Fallback] Error processing OpenRouter success response: {e}", exc_info=True); return None, "Sorry, error processing fallback AI response."
759
- elif response.status_code == 401: logger.error("[OpenRouter Fallback] API key invalid (401)."); return None, "Error: Fallback AI model configuration key is invalid."
760
- elif response.status_code == 402: logger.error("[OpenRouter Fallback] Payment Required/Quota Exceeded (402)."); return None, f"Sorry, fallback AI service ({OPENROUTER_MODEL}) quota/limit issue."
761
- elif response.status_code == 429: logger.warning("[OpenRouter Fallback] Rate Limit Exceeded (429)."); return None, f"Sorry, fallback AI model ({OPENROUTER_MODEL}) is busy. Try again."
762
- elif response.status_code == 500: logger.error(f"[OpenRouter Fallback] Internal Server Error (500). Resp:{response.text[:500]}"); return None, f"Sorry, fallback AI service ({OPENROUTER_MODEL}) had an internal error."
763
  else:
764
  error_info = "";
765
  try: error_info = response.json().get("error", {}).get("message", "")
766
  except Exception: pass
767
  logger.error(f"[OpenRouter Fallback] Unexpected status {response.status_code}. Error: '{error_info}' Resp:{response.text[:500]}");
768
- return None, f"Sorry, fallback AI service ({OPENROUTER_MODEL}) returned unexpected status ({response.status_code})."
769
 
770
- except httpx.TimeoutException as e: logger.error(f"[OpenRouter Fallback] Timeout error ({type(e)}) connecting/reading from OpenRouter API: {e}"); return None, f"Sorry, the fallback AI service ({OPENROUTER_MODEL}) timed out."
771
  except httpx.RequestError as e: logger.error(f"[OpenRouter Fallback] Request error connecting to OpenRouter API: {e}"); return None, "Sorry, there was an error connecting to the fallback AI model service."
772
  except Exception as e: logger.error(f"[OpenRouter Fallback] Unexpected error during OpenRouter call: {e}", exc_info=True); return None, "Sorry, an unexpected error occurred while using the fallback AI service."
773
 
774
 
775
- # <<< CHANGE: Rewritten function for new model hierarchy >>>
776
  async def generate_summary(text: str, summary_type: str) -> str:
777
  """
778
- Generates summary using the defined model hierarchy:
779
- 1. Groq (Llama 4/3)
780
- 2. Gemini (2.5 Pro)
781
  3. Gemini (2.0 Flash)
782
- 4. OpenRouter (DeepSeek)
783
  Returns the summary text or a comprehensive error message.
784
  """
785
  global _groq_enabled, _gemini_api_enabled, _openrouter_fallback_enabled
786
- global GROQ_LLAMA_MODEL, GEMINI_PRO_MODEL, GEMINI_FLASH_MODEL, OPENROUTER_MODEL
787
 
788
- logger.info("[Summary Generation] Starting process with new model hierarchy.")
789
  summary: Optional[str] = None
790
- errors: Dict[str, Optional[str]] = { # Store errors from each step
791
- "Groq": None,
792
- "GeminiPro": None,
793
  "GeminiFlash": None,
794
- "OpenRouter": None,
795
  }
796
 
797
- # --- Attempt 1: Groq (Primary) ---
798
  if _groq_enabled:
799
- logger.info(f"[Summary Generation] Attempting 1: Groq ({GROQ_LLAMA_MODEL})")
800
- summary, errors["Groq"] = await _call_groq(text, summary_type)
801
  if summary:
802
- logger.info(f"[Summary Generation] Success with Groq ({GROQ_LLAMA_MODEL}).")
803
  return summary
804
  else:
805
- logger.warning(f"[Summary Generation] Groq failed. Error: {errors['Groq']}. Proceeding to Gemini Pro.")
806
  else:
807
- logger.warning("[Summary Generation] Groq is disabled or unavailable. Skipping.")
808
- errors["Groq"] = "Service disabled/unavailable."
809
 
810
- # --- Attempt 2: Gemini 2.5 Pro ---
811
  if _gemini_api_enabled:
812
- logger.info(f"[Summary Generation] Attempting 2: Gemini ({GEMINI_PRO_MODEL})")
813
- summary, errors["GeminiPro"] = await _call_gemini(text, summary_type, GEMINI_PRO_MODEL)
814
  if summary:
815
- logger.info(f"[Summary Generation] Success with Gemini ({GEMINI_PRO_MODEL}).")
816
  return summary
817
  else:
818
- logger.warning(f"[Summary Generation] Gemini Pro failed. Error: {errors['GeminiPro']}. Proceeding to Gemini Flash.")
819
  else:
820
- logger.warning("[Summary Generation] Gemini API is disabled or unavailable. Skipping Gemini Pro & Flash.")
821
- errors["GeminiPro"] = "Service disabled/unavailable."
822
- errors["GeminiFlash"] = "Service disabled/unavailable." # Also skip Flash if API is down
823
 
824
  # --- Attempt 3: Gemini 2.0 Flash ---
825
- if _gemini_api_enabled and errors["GeminiFlash"] is None: # Only attempt if API enabled and not already marked unavailable
826
  logger.info(f"[Summary Generation] Attempting 3: Gemini ({GEMINI_FLASH_MODEL})")
827
  summary, errors["GeminiFlash"] = await _call_gemini(text, summary_type, GEMINI_FLASH_MODEL)
828
  if summary:
829
  logger.info(f"[Summary Generation] Success with Gemini ({GEMINI_FLASH_MODEL}).")
830
  return summary
831
  else:
832
- logger.warning(f"[Summary Generation] Gemini Flash failed. Error: {errors['GeminiFlash']}. Proceeding to OpenRouter.")
833
- elif errors["GeminiFlash"] is None: # Should have been marked unavailable above if _gemini_api_enabled was false
834
- logger.warning("[Summary Generation] Skipping Gemini Flash (API was disabled).")
835
  errors["GeminiFlash"] = "Service disabled/unavailable."
836
 
837
-
838
- # --- Attempt 4: OpenRouter (Final Fallback) ---
839
  if _openrouter_fallback_enabled:
840
- logger.info(f"[Summary Generation] Attempting 4: OpenRouter ({OPENROUTER_MODEL})")
841
- summary, errors["OpenRouter"] = await _call_openrouter(text, summary_type)
842
  if summary:
843
- logger.info(f"[Summary Generation] Success with OpenRouter ({OPENROUTER_MODEL}).")
844
  return summary
845
  else:
846
- logger.error(f"[Summary Generation] OpenRouter (Final Fallback) also failed. Error: {errors['OpenRouter']}")
847
  else:
848
- logger.error("[Summary Generation] OpenRouter fallback is disabled or unavailable. Cannot proceed.")
849
- errors["OpenRouter"] = "Service disabled/unavailable."
850
 
851
  # --- All Attempts Failed ---
852
  logger.error("[Summary Generation] All summarization models failed.")
853
- # Construct a final error message
854
  error_details = "\n".join([f"- {model}: {err}" for model, err in errors.items() if err])
855
  return f"Sorry, I couldn't generate a summary after trying all available AI models.\nDetails:\n{error_details}"
856
 
857
 
858
  # --- Main Processing Logic ---
859
- # (process_summary_task remains UNCHANGED in its core logic of fetching content,
860
- # but it now calls the updated `generate_summary` function.
861
- # Omitted here for brevity, but should be included in the final file.)
862
  # --- START OMITTED process_summary_task ---
863
  async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
864
  """Handles the entire process: fetching content (with ALL fallbacks) and summarizing."""
@@ -897,12 +828,12 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
897
  else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
898
  if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
899
  else:
900
- global URLTOTEXT_API_KEY, RAPIDAPI_KEY, APIFY_API_TOKEN, _urltotext_key_exists, _rapidapi_key_exists, _apify_token_exists # <<< Added globals
901
 
902
  logger.info(f"[Task {task_id}] Trying Web Scrape Method 1 (Direct Fetch + BS4)..."); content = await get_website_content(url)
903
  if not content:
904
  logger.warning(f"[Task {task_id}] Method 1 failed. Trying Method 2 (urltotext.com)...")
905
- if _urltotext_key_exists: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing'); content = await get_website_content_via_api(url, URLTOTEXT_API_KEY);
906
  else: logger.warning("[Task {task_id}] Method 2 (urltotext.com) API key unavailable. Skipping.")
907
  if not content:
908
  logger.warning(f"[Task {task_id}] Method 2 failed. Trying Method 3 (Scraper's Proxy)...")
@@ -932,8 +863,7 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
932
  except Exception as edit_e: logger.warning(f"[Task {task_id}] Failed to edit status message before summary: {edit_e}")
933
 
934
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
935
- # <<< CHANGE: Calls the updated generate_summary function >>>
936
- final_summary = await generate_summary(content, summary_type)
937
 
938
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
939
  user_feedback_message = final_summary
@@ -978,8 +908,7 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
978
 
979
  # --- Telegram Handlers ---
980
  # (start, help_command, handle_potential_url, handle_summary_type_callback, error_handler
981
- # remain UNCHANGED, except for a small modification in handle_summary_type_callback
982
- # to check the new availability flags. Omitted here for brevity, but include in final file.)
983
  # --- START OMITTED TELEGRAM HANDLERS ---
984
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
985
  user = update.effective_user; mention = user.mention_html()
@@ -996,7 +925,7 @@ async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> No
996
  "2. I'll ask how you want it summarised (paragraph or points).\n"
997
  "3. Click the button for your choice.\n"
998
  "4. Wait while I fetch the content and generate the summary!\n\n"
999
- "βš™οΈ I try multiple methods to get content, especially for tricky websites or YouTube videos without standard transcripts. I then use a sequence of AI models (Llama, Gemini Pro, Gemini Flash, DeepSeek) to summarise.\n\n" # <<< Updated help text slightly
1000
  "**Commands:**\n"
1001
  "`/start` - Display the welcome message\n"
1002
  "`/help` - Show this help message" )
@@ -1054,7 +983,7 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
1054
 
1055
  context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
1056
 
1057
- # <<< CHANGE: Check essential configurations based on new flags >>>
1058
  global TELEGRAM_TOKEN, _groq_enabled, _gemini_api_enabled, _openrouter_fallback_enabled
1059
  if not TELEGRAM_TOKEN:
1060
  logger.critical("TELEGRAM_TOKEN missing in callback!")
@@ -1066,16 +995,10 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
1066
  try: await query.edit_message_text(text="❌ AI configuration error: No summarization models available.")
1067
  except Exception: pass
1068
  return
1069
- # Log warnings if primary models are unavailable but don't stop the process if fallbacks exist
1070
- if not _groq_enabled: logger.warning("Primary AI (Groq) is unavailable, will start with Gemini Pro.")
1071
- if not _gemini_api_enabled: logger.warning("Gemini API is unavailable, will rely on Groq and/or OpenRouter.")
1072
- if not _openrouter_fallback_enabled and not (_groq_enabled or _gemini_api_enabled) :
1073
- # This case should already be caught above, but as a safeguard
1074
- logger.critical("No models available at all!")
1075
- try: await query.edit_message_text(text="❌ AI configuration error: No summarization models available.")
1076
- except Exception: pass
1077
- return
1078
-
1079
 
1080
  logger.info(f"Scheduling background task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
1081
  asyncio.create_task(
@@ -1097,10 +1020,10 @@ async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> N
1097
 
1098
 
1099
  # --- Application Setup & Web Framework ---
1100
- # (setup_bot_config, lifespan, health_check, telegram_webhook, app definition
1101
- # remain mostly UNCHANGED, except for updating the health_check response string.
1102
  # Omitted here for brevity, include in final file.)
1103
- # --- START OMITTED APP SETUP/WEB FRAMEWORK ---
1104
  async def setup_bot_config() -> Application:
1105
  logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
1106
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
@@ -1170,10 +1093,35 @@ async def lifespan(app: Starlette):
1170
  else: logger.info("PTB application was not fully initialized or failed during startup. No shutdown actions needed.")
1171
  logger.info("ASGI Lifespan: Shutdown complete.")
1172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1173
  async def health_check(request: Request) -> PlainTextResponse:
1174
  """Simple health check endpoint."""
1175
- # <<< CHANGE: Updated health check response format >>>
1176
- global GROQ_LLAMA_MODEL, GEMINI_PRO_MODEL, GEMINI_FLASH_MODEL, OPENROUTER_MODEL, APIFY_ACTOR_ID
1177
  global _groq_enabled, _gemini_api_enabled, _openrouter_fallback_enabled
1178
  global _apify_token_exists, _urltotext_key_exists, _rapidapi_key_exists, SUPADATA_API_KEY
1179
 
@@ -1191,15 +1139,15 @@ async def health_check(request: Request) -> PlainTextResponse:
1191
  bot_status = f"Error checking status: {type(e).__name__}"; logger.warning(f"Health check: Error getting bot info: {e}")
1192
  else: bot_status = "Not Initialized"; bot_username = "N/A"
1193
 
1194
- # <<< CHANGE: Update response string with new model order >>>
1195
  return PlainTextResponse(
1196
  f"TG Bot Summariser - Status: {bot_status} ({bot_username})\n"
1197
  f"---\n"
1198
- f"Summarizer Priority:\n"
1199
- f"1. Groq API: {GROQ_LLAMA_MODEL if _groq_enabled else 'DISABLED'}\n"
1200
- f"2. Gemini API (Pro): {GEMINI_PRO_MODEL if _gemini_api_enabled else 'DISABLED'}\n"
1201
- f"3. Gemini API (Flash): {GEMINI_FLASH_MODEL if _gemini_api_enabled else 'DISABLED'}\n"
1202
- f"4. OpenRouter API: {OPENROUTER_MODEL if _openrouter_fallback_enabled else 'DISABLED'}\n"
1203
  f"---\n"
1204
  f"Content Fetching Status:\n"
1205
  f"YT Fallback 1 (Supadata): {'Enabled' if SUPADATA_API_KEY else 'Disabled'}\n"
@@ -1210,31 +1158,6 @@ async def health_check(request: Request) -> PlainTextResponse:
1210
  f"Web Scrape 5/6 (Apify Actors): {'Enabled' if _apify_token_exists else 'Disabled'}"
1211
  )
1212
 
1213
- async def telegram_webhook(request: Request) -> Response:
1214
- """Handles incoming updates from Telegram."""
1215
- global WEBHOOK_SECRET
1216
- if not ptb_app: logger.error("Webhook received but PTB application not initialized."); return PlainTextResponse('Bot not initialized', status_code=503)
1217
- if not ptb_app.running: logger.warning("Webhook received but PTB application not running."); return PlainTextResponse('Bot not running, cannot process update', status_code=503)
1218
- if WEBHOOK_SECRET:
1219
- token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
1220
- if token_header != WEBHOOK_SECRET: logger.warning(f"Webhook received with invalid secret token. Header: '{token_header}'"); return Response(content="Invalid secret token", status_code=403)
1221
- try:
1222
- update_data = await request.json(); update = Update.de_json(data=update_data, bot=ptb_app.bot)
1223
- logger.debug(f"Processing update_id: {update.update_id} via webhook"); await ptb_app.process_update(update)
1224
- return Response(status_code=200)
1225
- except json.JSONDecodeError: logger.error("Webhook received invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
1226
- except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200)
1227
-
1228
- # --- Starlette App Definition ---
1229
- app = Starlette(
1230
- debug=False,
1231
- lifespan=lifespan,
1232
- routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ]
1233
- )
1234
- logger.info("Starlette ASGI application created with health check and webhook routes.")
1235
- # --- END OMITTED APP SETUP/WEB FRAMEWORK ---
1236
-
1237
-
1238
  # --- Development Server (if run directly) ---
1239
  if __name__ == '__main__':
1240
  import uvicorn
 
1
+ # main.py (Updated for Specific April 2025 Models: Llama 4 Scout & DeepSeek V3 Free)
2
  import os
3
  import re
4
  import logging
 
7
  import html
8
  import contextlib
9
  import traceback
10
+ import urllib.parse
11
  from typing import Optional, Dict, Any, Tuple
12
 
13
  # --- Frameworks ---
 
45
  try:
46
  import google.generativeai as genai
47
  from google.generativeai.types import HarmCategory, HarmBlockThreshold
48
+ _gemini_sdk_available = True
49
  except ImportError:
50
  genai = None
51
  HarmCategory = None
52
  HarmBlockThreshold = None
53
  _gemini_sdk_available = False
 
54
 
55
+ # --- Groq SDK ---
56
  try:
57
  from groq import Groq, GroqError
58
  _groq_sdk_available = True
 
71
  logging.getLogger('uvicorn').setLevel(logging.INFO)
72
  logging.getLogger('starlette').setLevel(logging.INFO)
73
  if _gemini_sdk_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
74
+ if _groq_sdk_available: logging.getLogger("groq").setLevel(logging.INFO)
75
  logger = logging.getLogger(__name__)
76
  logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
77
  if not _gemini_sdk_available: logger.warning("google-generativeai library not found. Gemini functionality disabled.")
78
+ if not _groq_sdk_available: logger.warning("groq library not found. Groq functionality disabled.")
79
 
80
 
81
  # --- Global variable for PTB app ---
 
90
  return value
91
 
92
  TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN')
93
+ GROQ_API_KEY = get_secret('GROQ_API_KEY') # For Llama 4
94
+ GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # For Gemini 2.5 Pro and 2.0 Flash
95
+ OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY') # For DeepSeek
96
+ URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
97
+ SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
98
+ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
99
+ RAPIDAPI_KEY = get_secret('RAPIDAPI_KEY')
100
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
101
 
102
+ # <<< CHANGE: Using EXACT Model Identifiers from User Docs >>>
103
  # Model Priority:
104
+ # 1. Groq Llama 4 Scout
105
+ # 2. Gemini 2.5 Pro Exp
106
  # 3. Gemini 2.0 Flash
107
+ # 4. OpenRouter DeepSeek V3 Free
108
+ GROQ_LLAMA4_MODEL = os.environ.get("GROQ_LLAMA4_MODEL", "meta-llama/llama-4-scout-17b-16e-instruct") # <<< Specific Llama 4 model
109
+ GEMINI_PRO_EXP_MODEL = os.environ.get("GEMINI_PRO_EXP_MODEL", "gemini-2.5-pro-exp-03-25")
110
+ GEMINI_FLASH_MODEL = os.environ.get("GEMINI_FLASH_MODEL", "gemini-2.0-flash-001")
111
+ OPENROUTER_DEEPSEEK_MODEL = os.environ.get("OPENROUTER_DEEPSEEK_MODEL", "deepseek/deepseek-chat-v3-0324:free") # <<< Specific DeepSeek model
112
 
113
+ APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") # YT Default
114
  APIFY_CRAWLER_ACTOR_ID = "apify/website-content-crawler" # Scrape Fallback 4
115
  APIFY_TEXT_SCRAPER_ACTOR_ID = "karamelo/text-scraper-free" # Scrape Fallback 5
116
 
 
119
 
120
  # Summarizer Availability Checks
121
  _groq_enabled = _groq_sdk_available and bool(GROQ_API_KEY)
122
+ _gemini_api_enabled = _gemini_sdk_available and bool(GEMINI_API_KEY)
123
  _openrouter_fallback_enabled = bool(OPENROUTER_API_KEY)
124
 
125
  if not _groq_enabled:
126
+ if not _groq_sdk_available: logger.error("❌ ERROR: groq library missing. Groq (Llama 4) disabled.")
127
+ elif not GROQ_API_KEY: logger.error("❌ ERROR: GROQ_API_KEY not found. Primary summarization (Groq Llama 4) will fail.")
128
  if not _gemini_api_enabled:
129
  if not _gemini_sdk_available: logger.warning("⚠️ WARNING: google-generativeai library missing. Gemini disabled.")
130
+ elif not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found. Gemini summarization fallbacks (2.5 Pro / 2.0 Flash) will fail.")
131
  if not _openrouter_fallback_enabled: logger.warning("⚠️ WARNING: OPENROUTER_API_KEY not found. Final fallback summarization (DeepSeek) will fail.")
132
 
133
  if not _groq_enabled and not _gemini_api_enabled and not _openrouter_fallback_enabled:
134
  logger.critical("❌ FATAL: No summarization models are configured or enabled. Bot cannot function.")
 
 
135
  elif not _groq_enabled:
136
+ logger.warning("⚠️ Primary summarizer (Groq Llama 4) is disabled. Will start with Gemini 2.5 Pro.")
137
 
138
  # Scraper Availability Checks (Warnings only)
139
  if not RAPIDAPI_KEY: logger.warning("⚠️ WARNING: RAPIDAPI_KEY not found. RapidAPI scraping fallbacks (2 & 3) will be unavailable.")
 
143
  if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found. Webhook security disabled.")
144
 
145
  logger.info("Secret loading and configuration check finished.")
146
+ logger.info(f"Summarizer 1 (Groq): {GROQ_LLAMA4_MODEL if _groq_enabled else 'DISABLED'}")
147
+ logger.info(f"Summarizer 2 (Gemini Pro Exp): {GEMINI_PRO_EXP_MODEL if _gemini_api_enabled else 'DISABLED'}")
148
+ logger.info(f"Summarizer 3 (Gemini Flash): {GEMINI_FLASH_MODEL if _gemini_api_enabled else 'DISABLED'}")
149
+ logger.info(f"Summarizer 4 (OpenRouter): {OPENROUTER_DEEPSEEK_MODEL if _openrouter_fallback_enabled else 'DISABLED'}")
150
  logger.info(f"Using Apify Actor (YT Default): {APIFY_ACTOR_ID}")
151
  logger.info(f"Using Apify Actor (Web Scrape Fallback 4): {APIFY_CRAWLER_ACTOR_ID}")
152
  logger.info(f"Using Apify Actor (Web Scrape Fallback 5): {APIFY_TEXT_SCRAPER_ACTOR_ID}")
153
 
154
+ # Flags for scraper key existence
155
  _apify_token_exists = bool(APIFY_API_TOKEN)
156
  _urltotext_key_exists = bool(URLTOTEXT_API_KEY)
157
  _rapidapi_key_exists = bool(RAPIDAPI_KEY)
158
 
159
  # --- Configure APIs ---
160
+ if _gemini_api_enabled:
161
  try: genai.configure(api_key=GEMINI_API_KEY); logger.info("Google GenAI client configured successfully.")
162
  except Exception as e: logger.error(f"Failed to configure Google GenAI client: {e}"); _gemini_api_enabled = False
163
 
164
+ # Groq client is initialized per-request in the _call_groq function
165
 
166
  # --- Retry Decorator ---
167
  @retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
 
184
  if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
185
  else: logger.warning(f"Could not extract YT ID from {url}"); return None
186
 
 
187
  # --- Content Fetching Functions ---
188
  # (These functions: get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript,
189
  # fetch_url_content_for_scrape, get_website_content, get_website_content_via_api,
190
  # get_website_content_via_scrapers_proxy, get_website_content_via_ai_web_scraper,
191
  # _run_apify_actor_for_web_content, get_website_content_via_apify_crawler,
192
  # get_website_content_via_apify_text_scraper remain UNCHANGED. They are omitted here for brevity
193
+ # but MUST be included in the final main.py file)
194
  # --- START OMITTED CONTENT FETCHING FUNCTIONS ---
195
  async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
196
  if not video_id: logger.error("[Supadata] No video_id provided"); return None
 
224
 
225
  async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
226
  """Fallback YT 2: Fetches YouTube transcript using default Apify Actor."""
227
+ global APIFY_ACTOR_ID
228
  if not video_url: logger.error("[Apify YT] No video_url provided"); return None
229
  if not api_token: logger.error("[Apify YT] API token missing."); return None
230
  logger.info(f"[YT Fallback 2] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
 
231
  sync_items_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
232
  params = {"token": api_token}
233
+ payload = { "urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5, "channelHandleBoolean": False, "channelNameBoolean": False, "datePublishedBoolean": False, "relativeDateTextBoolean": False, }
 
 
 
 
 
 
 
 
234
  headers = {"Content-Type": "application/json"}
 
235
  try:
236
+ async with httpx.AsyncClient(timeout=120.0) as client:
237
  logger.debug(f"[Apify YT] POST Request to {sync_items_endpoint} for {video_url}")
238
  response = await client.post(sync_items_endpoint, headers=headers, params=params, json=payload)
239
  logger.debug(f"[Apify YT] Received status code {response.status_code} for {video_url}")
 
240
  if response.status_code == 200:
241
  try:
242
  results = response.json()
243
  if isinstance(results, list) and len(results) > 0:
244
+ item = results[0]; content = None
 
 
245
  if "captions" in item and isinstance(item["captions"], str): content = item["captions"]
246
  elif "text" in item and isinstance(item["text"], str): content = item["text"]
247
  elif "transcript" in item and isinstance(item["transcript"], str): content = item["transcript"]
248
+ elif "captions" in item and isinstance(item["captions"], list):
249
  if len(item["captions"]) > 0 and isinstance(item["captions"][0], dict) and 'text' in item["captions"][0]: content = " ".join(line.get("text", "") for line in item["captions"] if line.get("text"))
250
  elif len(item["captions"]) > 0 and isinstance(item["captions"][0], str): content = " ".join(item["captions"])
 
251
  if content and isinstance(content, str): logger.info(f"[Apify YT] Success via REST for {video_url}. Length: {len(content)}"); return content.strip()
252
  else: logger.warning(f"[Apify YT] Dataset item parsed but transcript content empty/invalid format for {video_url}. Item keys: {list(item.keys())}"); return None
253
  else: logger.warning(f"[Apify YT] Actor success but dataset was empty for {video_url}. Response: {results}"); return None
 
263
  except Exception as e: logger.error(f"[Apify YT] Unexpected error during Apify YT call for {video_url}: {e}", exc_info=True); return None
264
 
265
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
266
+ global SUPADATA_API_KEY, APIFY_API_TOKEN, _apify_token_exists
267
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
268
  logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
269
  transcript_text = None
 
270
  logger.info("[Primary YT] Attempting youtube-transcript-api...")
271
  try:
272
  transcript_list = await asyncio.to_thread( YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'] )
 
276
  except NoTranscriptFound: logger.warning(f"[Primary YT] No transcript found via lib for {video_id}.")
277
  except TranscriptsDisabled: logger.warning(f"[Primary YT] Transcripts disabled via lib for {video_id}.")
278
  except Exception as e: logger.warning(f"[Primary YT] Error via lib for {video_id}: {e}"); transcript_text = None
 
 
279
  if transcript_text is None:
280
  logger.info("[Fallback YT 1] Trying Supadata API...")
281
  if SUPADATA_API_KEY:
 
283
  if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text
284
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
285
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
 
 
286
  if transcript_text is None:
287
  logger.info("[Fallback YT 2] Trying Apify REST API (Default YT Actor)...")
288
+ if _apify_token_exists:
289
  transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
290
  if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify Default YT Actor for {video_url}"); return transcript_text
291
  else: logger.warning(f"[Fallback YT 2] Apify Default YT Actor failed or no content for {video_url}.")
292
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
 
 
293
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
294
  return transcript_text
295
 
296
  async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
 
297
  headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
298
  try:
299
  async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
300
  logger.debug(f"[Web Scrape Direct] Sending GET request to {url}")
301
  response = await client.get(url)
302
  logger.debug(f"[Web Scrape Direct] Received response {response.status_code} from {url}")
303
+ response.raise_for_status()
304
  content_type = response.headers.get('content-type', '').lower()
305
+ if 'html' not in content_type: logger.warning(f"[Web Scrape Direct] Non-HTML content type received from {url}: {content_type}"); return None
 
 
306
  try: return response.text
307
  except Exception as e: logger.error(f"[Web Scrape Direct] Error decoding response text for {url}: {e}"); return None
308
  except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape Direct] HTTP error {e.response.status_code} fetching {url}: {e}")
 
313
  return None
314
 
315
  async def get_website_content(url: str) -> Optional[str]:
 
316
  if not url: logger.error("[Web Scrape Primary] No URL provided"); return None
317
  logger.info(f"[Web Scrape Primary] Attempting direct fetch and parse for: {url}")
318
  html_content = await fetch_url_content_for_scrape(url)
319
+ if not html_content: logger.warning(f"[Web Scrape Primary] Direct fetch failed for {url}."); return None
 
 
320
  try:
321
  def parse_html(content: str) -> Optional[str]:
322
  try:
323
  soup = BeautifulSoup(content, DEFAULT_PARSER)
324
+ for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "textarea", "select", "option", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "picture", "source", "map", "area"]): element.extract()
 
325
  main_content = soup.find('main') or soup.find('article') or soup.find(role='main') or soup.find(id=re.compile(r'content|main|body', re.I)) or soup.find(class_=re.compile(r'content|main|body|article|post', re.I))
326
  target_element = main_content if main_content else soup.body
327
  if not target_element: logger.warning(f"[Web Scrape Primary Parse] Could not find body or main content container for {url}"); return None
 
336
  except Exception as e: logger.error(f"[Web Scrape Primary] Unexpected error during parsing process for {url}: {e}", exc_info=True); return None
337
 
338
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
 
339
  if not url: logger.error("[Web Scrape Fallback 1] No URL"); return None
340
  if not api_key: logger.error("[Web Scrape Fallback 1] urltotext.com API key missing."); return None
341
  logger.info(f"[Web Scrape Fallback 1] Attempting fetch for: {url} using urltotext.com API")
 
363
  except Exception as e: logger.error(f"[Web Scrape Fallback 1] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
364
 
365
  async def get_website_content_via_scrapers_proxy(url: str, api_key: str) -> Optional[str]:
 
366
  if not url: logger.error("[Web Scrape Fallback 2] No URL provided"); return None
367
  if not api_key: logger.error("[Web Scrape Fallback 2] RapidAPI key missing."); return None
368
  logger.info(f"[Web Scrape Fallback 2] Attempting fetch for: {url} using Scraper's Proxy Parser API")
 
396
  except Exception as e: logger.error(f"[Web Scrape Fallback 2] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None
397
 
398
  async def get_website_content_via_ai_web_scraper(url: str, api_key: str) -> Optional[str]:
 
399
  if not url: logger.error("[Web Scrape Fallback 3] No URL provided"); return None
400
  if not api_key: logger.error("[Web Scrape Fallback 3] RapidAPI key missing."); return None
401
  logger.info(f"[Web Scrape Fallback 3] Attempting fetch for: {url} using AI Web Scraper API")
 
432
  except Exception as e: logger.error(f"[Web Scrape Fallback 3] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None
433
 
434
  async def _run_apify_actor_for_web_content(url: str, api_token: str, actor_id: str, actor_name: str) -> Optional[str]:
 
435
  if not url: logger.error(f"[{actor_name}] No URL provided"); return None
436
  if not api_token: logger.error(f"[{actor_name}] API token missing."); return None
437
  logger.info(f"[{actor_name}] Attempting fetch for URL: {url} (Actor: {actor_id})")
 
474
  except Exception as e: logger.error(f"[{actor_name}] Unexpected error during {actor_name} call for {url}: {e}", exc_info=True); return None
475
 
476
  async def get_website_content_via_apify_crawler(url: str, api_token: str) -> Optional[str]:
 
477
  return await _run_apify_actor_for_web_content( url=url, api_token=api_token, actor_id=APIFY_CRAWLER_ACTOR_ID, actor_name="Apify Crawler" )
478
 
479
  async def get_website_content_via_apify_text_scraper(url: str, api_token: str) -> Optional[str]:
 
480
  return await _run_apify_actor_for_web_content( url=url, api_token=api_token, actor_id=APIFY_TEXT_SCRAPER_ACTOR_ID, actor_name="Apify Text Scraper" )
481
  # --- END OMITTED CONTENT FETCHING FUNCTIONS ---
482
 
483
 
484
+ # --- Summarization Functions (Using Specific April 2025 Models) ---
485
 
486
  # --- Prompts (Defined once, used by all models) ---
487
  PROMPT_PARAGRAPH = (
 
516
  "Here is the text to summarise:"
517
  )
518
 
519
+ # <<< Uses the specific GROQ_LLAMA4_MODEL constant >>>
520
  async def _call_groq(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
521
+ """Internal function to call Groq API (Primary - Llama 4 Scout). Returns (summary, error_message)."""
522
+ global GROQ_API_KEY, GROQ_LLAMA4_MODEL, _groq_enabled
523
  if not _groq_enabled:
524
  logger.error("[Groq Primary] Called but is disabled.");
525
+ return None, f"Error: Primary AI service (Groq {GROQ_LLAMA4_MODEL}) not configured/available."
526
+ logger.info(f"[Groq Primary] Generating {summary_type} summary using {GROQ_LLAMA4_MODEL}. Input length: {len(text)}")
527
 
528
  prompt = PROMPT_PARAGRAPH if summary_type == "paragraph" else PROMPT_POINTS
529
 
530
+ # Input Length Check for Llama 4 Scout (16k context? Be conservative)
531
+ MAX_INPUT_LENGTH_GROQ = 40000 # ~13k tokens
 
532
  if len(text) > MAX_INPUT_LENGTH_GROQ:
533
  logger.warning(f"[Groq Primary] Input length ({len(text)}) exceeds estimated limit ({MAX_INPUT_LENGTH_GROQ}). Truncating.");
534
  text = text[:MAX_INPUT_LENGTH_GROQ] + "... (Content truncated)"
535
  full_prompt = f"{prompt}\n\n{text}"
536
 
537
  try:
538
+ groq_client = Groq( api_key=GROQ_API_KEY, timeout=httpx.Timeout(120.0, connect=10.0) )
539
+ logger.info(f"[Groq Primary] Sending request to Groq ({GROQ_LLAMA4_MODEL})...")
 
 
 
 
540
 
541
  chat_completion = await groq_client.chat.completions.create(
542
  messages=[ { "role": "user", "content": full_prompt } ],
543
+ model=GROQ_LLAMA4_MODEL, # <<< Use specific Llama 4 model name
544
+ temperature=0.7, # <<< Groq default is 1, adjust if needed
545
+ max_tokens=2048, # <<< Groq default is 1024, adjust if needed for longer summaries
546
  top_p=1,
547
  stream=False,
548
  stop=None,
 
555
  logger.info(f"[Groq Primary] Success generating summary. Finish Reason: {finish_reason}. Output len: {len(summary)}");
556
  return summary.strip(), None
557
  else:
 
558
  logger.warning(f"[Groq Primary] Groq response structure unexpected or content empty. Response: {chat_completion.model_dump_json(indent=2)}")
559
  finish_reason = chat_completion.choices[0].finish_reason if chat_completion.choices else 'N/A'
560
+ return None, f"Sorry, the primary AI model ({GROQ_LLAMA4_MODEL}) provided an empty or invalid response (Finish Reason: {finish_reason})."
561
 
562
  except GroqError as ge:
563
+ logger.error(f"[Groq Primary] Groq API error: {ge.status_code} - {ge.message}", exc_info=False)
564
+ error_msg = f"Sorry, the primary AI service ({GROQ_LLAMA4_MODEL}) failed. API Error: {ge.status_code}."
565
+ if ge.status_code == 401: error_msg = f"Error: Primary AI service (Groq {GROQ_LLAMA4_MODEL}) API key is invalid."
566
+ elif ge.status_code == 429: error_msg = f"Sorry, primary AI model ({GROQ_LLAMA4_MODEL}) is busy (Rate Limit). Try again."
 
567
  return None, error_msg
568
  except httpx.TimeoutException as te:
569
  logger.error(f"[Groq Primary] Timeout during Groq API call: {te}")
570
+ return None, f"Sorry, the primary AI service ({GROQ_LLAMA4_MODEL}) timed out."
571
  except httpx.RequestError as re:
572
  logger.error(f"[Groq Primary] Network error during Groq API call: {re}")
573
+ return None, f"Sorry, couldn't connect to the primary AI service ({GROQ_LLAMA4_MODEL})."
574
  except Exception as e:
575
  logger.error(f"[Groq Primary] Unexpected error during Groq API call: {e}", exc_info=True);
576
+ return None, f"Sorry, an unexpected error occurred while using the primary AI service ({GROQ_LLAMA4_MODEL})."
577
 
578
 
 
579
  async def _call_gemini(text: str, summary_type: str, model_name: str) -> Tuple[Optional[str], Optional[str]]:
580
  """Internal function to call Gemini API. Returns (summary, error_message)."""
581
  global _gemini_api_enabled
 
585
  logger.info(f"[Gemini {model_name}] Generating {summary_type} summary using {model_name}. Input length: {len(text)}")
586
 
587
  prompt = PROMPT_PARAGRAPH if summary_type == "paragraph" else PROMPT_POINTS
588
+ MAX_INPUT_LENGTH_GEMINI = 900000
 
 
589
  if len(text) > MAX_INPUT_LENGTH_GEMINI:
590
  logger.warning(f"[Gemini {model_name}] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH_GEMINI}). Truncating.");
591
  text = text[:MAX_INPUT_LENGTH_GEMINI] + "... (Content truncated)"
592
  full_prompt = f"{prompt}\n\n{text}"
 
 
593
  safety_settings = { HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, }
 
 
594
  for category_name in dir(HarmCategory):
595
  if category_name.startswith('HARM_CATEGORY_') and getattr(HarmCategory, category_name) not in safety_settings:
596
  safety_settings[getattr(HarmCategory, category_name)] = HarmBlockThreshold.BLOCK_NONE
 
597
  logger.debug(f"[Gemini {model_name}] Using safety settings: { {k.name: v.name for k, v in safety_settings.items()} }")
598
 
599
  try:
 
602
  logger.info(f"[Gemini {model_name}] Sending request to Gemini ({model_name})...")
603
  request_options = {"timeout": 120}
604
  response = await model.generate_content_async(
605
+ full_prompt, generation_config=genai.types.GenerationConfig(),
606
+ safety_settings=safety_settings, request_options=request_options )
 
 
 
607
  logger.info(f"[Gemini {model_name}] Received response from Gemini.")
 
 
608
  if response.prompt_feedback and response.prompt_feedback.block_reason:
609
  block_reason_str = getattr(response.prompt_feedback.block_reason, 'name', str(response.prompt_feedback.block_reason))
610
  logger.warning(f"[Gemini {model_name}] Request blocked by API. Reason: {block_reason_str}");
611
  return None, f"Sorry, the AI model ({model_name}) blocked the request (Reason: {block_reason_str})."
612
+ summary = None; finish_reason_str = 'UNKNOWN'
 
 
 
613
  if response.candidates:
614
  candidate = response.candidates[0]
615
  finish_reason_name = getattr(candidate.finish_reason, 'name', None)
616
  finish_reason_str = finish_reason_name or 'N/A'
 
617
  if finish_reason_name == 'SAFETY':
618
  safety_ratings_str = ", ".join([f"{rating.category.name}: {rating.probability.name}" for rating in candidate.safety_ratings])
619
  logger.warning(f"[Gemini {model_name}] Candidate blocked due to SAFETY. Finish Reason: {finish_reason_str}. Ratings: [{safety_ratings_str}]")
620
  return None, f"Sorry, the AI model ({model_name}) blocked the response due to safety filters ({finish_reason_str})."
621
  elif finish_reason_name not in ['STOP', 'MAX_TOKENS', None]:
622
  logger.warning(f"[Gemini {model_name}] Candidate finished with non-standard reason: {finish_reason_str}")
 
 
623
  if candidate.content and candidate.content.parts:
624
  summary = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
 
 
625
  if summary is None:
626
  try: summary = response.text
627
+ except ValueError as e: logger.warning(f"[Gemini {model_name}] Error accessing response.text (likely blocked): {e}"); summary = None
 
628
  if summary:
629
  logger.info(f"[Gemini {model_name}] Success generating summary. Finish Reason: {finish_reason_str}. Output len: {len(summary)}");
630
  return summary.strip(), None
631
  else:
632
  logger.warning(f"[Gemini {model_name}] Gemini returned empty summary or content was blocked. Final Finish Reason: {finish_reason_str}");
633
  return None, f"Sorry, the AI model ({model_name}) did not provide a summary (Finish Reason: {finish_reason_str})."
 
634
  except AttributeError as ae:
635
  logger.error(f"[Gemini {model_name}] AttributeError during Gemini response processing: {ae}. SDK might be incompatible or response structure unexpected.", exc_info=True);
636
  return None, f"Sorry, there was an issue processing the response from the AI service ({model_name})."
637
  except Exception as e:
638
  logger.error(f"[Gemini {model_name}] Unexpected error during Gemini API call: {e}", exc_info=True);
 
 
639
  error_msg = f"Sorry, an unexpected error occurred while using the AI service ({model_name})."
 
640
  return None, error_msg
641
 
642
+ # <<< Uses the specific OPENROUTER_DEEPSEEK_MODEL constant >>>
 
643
  async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
644
+ """Internal function to call OpenRouter API (Final Fallback - DeepSeek V3 Free). Returns (summary, error_message)."""
645
+ global OPENROUTER_API_KEY, OPENROUTER_DEEPSEEK_MODEL, _openrouter_fallback_enabled
646
  if not _openrouter_fallback_enabled:
647
  logger.error("[OpenRouter Fallback] Called but is disabled.");
648
  return None, "Error: Final fallback AI service (OpenRouter) not configured/available."
649
+ logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_DEEPSEEK_MODEL}. Input length: {len(text)}")
650
 
651
  prompt = PROMPT_PARAGRAPH if summary_type == "paragraph" else PROMPT_POINTS
652
+ MAX_INPUT_LENGTH_OR = 100000 # DeepSeek V3 has 131k context, 100k chars is safe
 
 
653
  if len(text) > MAX_INPUT_LENGTH_OR:
654
+ logger.warning(f"[OpenRouter Fallback] Input length ({len(text)}) exceeds estimated limit ({MAX_INPUT_LENGTH_OR}) for {OPENROUTER_DEEPSEEK_MODEL}. Truncating.");
655
  text = text[:MAX_INPUT_LENGTH_OR] + "... (Content truncated)"
656
  full_prompt = f"{prompt}\n\n{text}"
657
 
658
+ # Use the direct httpx call as before, ensuring the correct model name is in the payload
659
+ headers = {
660
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
661
+ "Content-Type": "application/json",
662
+ "HTTP-Referer": os.environ.get("YOUR_SITE_URL", "https://github.com/your-repo"), # Optional header
663
+ "X-Title": os.environ.get("YOUR_SITE_NAME", "TelegramSummariserBot") # Optional header
664
+ }
665
+ payload = {
666
+ "model": OPENROUTER_DEEPSEEK_MODEL, # <<< Use specific DeepSeek model name
667
+ "messages": [{"role": "user", "content": full_prompt}],
668
+ }
669
  openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
670
+ api_timeouts = httpx.Timeout(connect=10.0, read=60.0, write=10.0, pool=60.0)
671
  response = None
672
 
673
  try:
674
  async with httpx.AsyncClient(timeout=api_timeouts) as client:
675
+ logger.info(f"[OpenRouter Fallback] Sending request to OpenRouter ({OPENROUTER_DEEPSEEK_MODEL}) with read timeout {api_timeouts.read}s...")
676
  response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
677
  logger.info(f"[OpenRouter Fallback] Received response from OpenRouter. Status code: {response.status_code}")
678
 
 
684
  if message and isinstance(message, dict):
685
  summary = message.get("content")
686
  if summary: logger.info(f"[OpenRouter Fallback] Success via OpenRouter. Finish: {finish_reason}. Output len: {len(summary)}"); return summary.strip(), None
687
+ else: logger.warning(f"[OpenRouter Fallback] OpenRouter success but content empty. Finish: {finish_reason}. Resp: {data}"); return None, f"Sorry, the fallback AI model ({OPENROUTER_DEEPSEEK_MODEL}) returned an empty summary (Finish: {finish_reason})."
688
  else: logger.error(f"[OpenRouter Fallback] Unexpected message structure: {message}. Finish: {finish_reason}. Full: {data}"); return None, "Sorry, could not parse fallback AI response (message format)."
689
  else:
690
  error_details = data.get("error", {}); logger.error(f"[OpenRouter Fallback] Unexpected choices structure or error in response: {data.get('choices')}. Error: {error_details}. Full: {data}");
691
  return None, f"Sorry, could not parse fallback AI response (choices structure or error: {error_details.get('message', 'Unknown')})."
692
  except json.JSONDecodeError: logger.error(f"[OpenRouter Fallback] Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return None, "Sorry, failed to understand fallback AI response."
693
  except Exception as e: logger.error(f"[OpenRouter Fallback] Error processing OpenRouter success response: {e}", exc_info=True); return None, "Sorry, error processing fallback AI response."
694
+ elif response.status_code == 401: logger.error("[OpenRouter Fallback] API key invalid (401)."); return None, f"Error: Fallback AI model ({OPENROUTER_DEEPSEEK_MODEL}) configuration key is invalid."
695
+ elif response.status_code == 402: logger.error("[OpenRouter Fallback] Payment Required/Quota Exceeded (402)."); return None, f"Sorry, fallback AI service ({OPENROUTER_DEEPSEEK_MODEL}) quota/limit issue."
696
+ elif response.status_code == 429: logger.warning("[OpenRouter Fallback] Rate Limit Exceeded (429)."); return None, f"Sorry, fallback AI model ({OPENROUTER_DEEPSEEK_MODEL}) is busy. Try again."
697
+ elif response.status_code == 500: logger.error(f"[OpenRouter Fallback] Internal Server Error (500). Resp:{response.text[:500]}"); return None, f"Sorry, fallback AI service ({OPENROUTER_DEEPSEEK_MODEL}) had an internal error."
698
  else:
699
  error_info = "";
700
  try: error_info = response.json().get("error", {}).get("message", "")
701
  except Exception: pass
702
  logger.error(f"[OpenRouter Fallback] Unexpected status {response.status_code}. Error: '{error_info}' Resp:{response.text[:500]}");
703
+ return None, f"Sorry, fallback AI service ({OPENROUTER_DEEPSEEK_MODEL}) returned unexpected status ({response.status_code})."
704
 
705
+ except httpx.TimeoutException as e: logger.error(f"[OpenRouter Fallback] Timeout error ({type(e)}) connecting/reading from OpenRouter API: {e}"); return None, f"Sorry, the fallback AI service ({OPENROUTER_DEEPSEEK_MODEL}) timed out."
706
  except httpx.RequestError as e: logger.error(f"[OpenRouter Fallback] Request error connecting to OpenRouter API: {e}"); return None, "Sorry, there was an error connecting to the fallback AI model service."
707
  except Exception as e: logger.error(f"[OpenRouter Fallback] Unexpected error during OpenRouter call: {e}", exc_info=True); return None, "Sorry, an unexpected error occurred while using the fallback AI service."
708
 
709
 
 
710
  async def generate_summary(text: str, summary_type: str) -> str:
711
  """
712
+ Generates summary using the specific model hierarchy (April 2025):
713
+ 1. Groq (Llama 4 Scout)
714
+ 2. Gemini (2.5 Pro Exp)
715
  3. Gemini (2.0 Flash)
716
+ 4. OpenRouter (DeepSeek V3 Free)
717
  Returns the summary text or a comprehensive error message.
718
  """
719
  global _groq_enabled, _gemini_api_enabled, _openrouter_fallback_enabled
720
+ global GROQ_LLAMA4_MODEL, GEMINI_PRO_EXP_MODEL, GEMINI_FLASH_MODEL, OPENROUTER_DEEPSEEK_MODEL
721
 
722
+ logger.info("[Summary Generation] Starting process with specific April 2025 model hierarchy.")
723
  summary: Optional[str] = None
724
+ errors: Dict[str, Optional[str]] = {
725
+ "Llama4Scout": None, # <<< Use more descriptive keys
726
+ "GeminiProExp": None,
727
  "GeminiFlash": None,
728
+ "DeepSeekV3": None,
729
  }
730
 
731
+ # --- Attempt 1: Groq (Llama 4 Scout) ---
732
  if _groq_enabled:
733
+ logger.info(f"[Summary Generation] Attempting 1: Groq ({GROQ_LLAMA4_MODEL})")
734
+ summary, errors["Llama4Scout"] = await _call_groq(text, summary_type)
735
  if summary:
736
+ logger.info(f"[Summary Generation] Success with Groq ({GROQ_LLAMA4_MODEL}).")
737
  return summary
738
  else:
739
+ logger.warning(f"[Summary Generation] Groq Llama 4 Scout failed. Error: {errors['Llama4Scout']}. Proceeding to Gemini 2.5 Pro Exp.")
740
  else:
741
+ logger.warning("[Summary Generation] Groq (Llama 4 Scout) is disabled or unavailable. Skipping.")
742
+ errors["Llama4Scout"] = "Service disabled/unavailable."
743
 
744
+ # --- Attempt 2: Gemini 2.5 Pro Exp ---
745
  if _gemini_api_enabled:
746
+ logger.info(f"[Summary Generation] Attempting 2: Gemini ({GEMINI_PRO_EXP_MODEL})")
747
+ summary, errors["GeminiProExp"] = await _call_gemini(text, summary_type, GEMINI_PRO_EXP_MODEL)
748
  if summary:
749
+ logger.info(f"[Summary Generation] Success with Gemini ({GEMINI_PRO_EXP_MODEL}).")
750
  return summary
751
  else:
752
+ logger.warning(f"[Summary Generation] Gemini 2.5 Pro Exp failed. Error: {errors['GeminiProExp']}. Proceeding to Gemini 2.0 Flash.")
753
  else:
754
+ logger.warning("[Summary Generation] Gemini API is disabled or unavailable. Skipping Gemini 2.5 Pro Exp & 2.0 Flash.")
755
+ errors["GeminiProExp"] = "Service disabled/unavailable."
756
+ errors["GeminiFlash"] = "Service disabled/unavailable."
757
 
758
  # --- Attempt 3: Gemini 2.0 Flash ---
759
+ if _gemini_api_enabled and errors["GeminiFlash"] is None:
760
  logger.info(f"[Summary Generation] Attempting 3: Gemini ({GEMINI_FLASH_MODEL})")
761
  summary, errors["GeminiFlash"] = await _call_gemini(text, summary_type, GEMINI_FLASH_MODEL)
762
  if summary:
763
  logger.info(f"[Summary Generation] Success with Gemini ({GEMINI_FLASH_MODEL}).")
764
  return summary
765
  else:
766
+ logger.warning(f"[Summary Generation] Gemini 2.0 Flash failed. Error: {errors['GeminiFlash']}. Proceeding to OpenRouter DeepSeek V3.")
767
+ elif errors["GeminiFlash"] is None:
768
+ logger.warning("[Summary Generation] Skipping Gemini 2.0 Flash (API was disabled).")
769
  errors["GeminiFlash"] = "Service disabled/unavailable."
770
 
771
+ # --- Attempt 4: OpenRouter (DeepSeek V3 Free - Final Fallback) ---
 
772
  if _openrouter_fallback_enabled:
773
+ logger.info(f"[Summary Generation] Attempting 4: OpenRouter ({OPENROUTER_DEEPSEEK_MODEL})")
774
+ summary, errors["DeepSeekV3"] = await _call_openrouter(text, summary_type)
775
  if summary:
776
+ logger.info(f"[Summary Generation] Success with OpenRouter ({OPENROUTER_DEEPSEEK_MODEL}).")
777
  return summary
778
  else:
779
+ logger.error(f"[Summary Generation] OpenRouter DeepSeek V3 (Final Fallback) also failed. Error: {errors['DeepSeekV3']}")
780
  else:
781
+ logger.error("[Summary Generation] OpenRouter fallback (DeepSeek V3) is disabled or unavailable. Cannot proceed.")
782
+ errors["DeepSeekV3"] = "Service disabled/unavailable."
783
 
784
  # --- All Attempts Failed ---
785
  logger.error("[Summary Generation] All summarization models failed.")
 
786
  error_details = "\n".join([f"- {model}: {err}" for model, err in errors.items() if err])
787
  return f"Sorry, I couldn't generate a summary after trying all available AI models.\nDetails:\n{error_details}"
788
 
789
 
790
  # --- Main Processing Logic ---
791
+ # (process_summary_task remains UNCHANGED in its core logic, it correctly calls the updated generate_summary.
792
+ # Omitted here for brevity, but MUST be included in the final file.)
 
793
  # --- START OMITTED process_summary_task ---
794
  async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
795
  """Handles the entire process: fetching content (with ALL fallbacks) and summarizing."""
 
828
  else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
829
  if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
830
  else:
831
+ global URLTOTEXT_API_KEY, RAPIDAPI_KEY, APIFY_API_TOKEN, _urltotext_key_exists, _rapidapi_key_exists, _apify_token_exists
832
 
833
  logger.info(f"[Task {task_id}] Trying Web Scrape Method 1 (Direct Fetch + BS4)..."); content = await get_website_content(url)
834
  if not content:
835
  logger.warning(f"[Task {task_id}] Method 1 failed. Trying Method 2 (urltotext.com)...")
836
+ if _urltotext_key_exists: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing'); content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
837
  else: logger.warning("[Task {task_id}] Method 2 (urltotext.com) API key unavailable. Skipping.")
838
  if not content:
839
  logger.warning(f"[Task {task_id}] Method 2 failed. Trying Method 3 (Scraper's Proxy)...")
 
863
  except Exception as edit_e: logger.warning(f"[Task {task_id}] Failed to edit status message before summary: {edit_e}")
864
 
865
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
866
+ final_summary = await generate_summary(content, summary_type) # Calls the updated function
 
867
 
868
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
869
  user_feedback_message = final_summary
 
908
 
909
  # --- Telegram Handlers ---
910
  # (start, help_command, handle_potential_url, handle_summary_type_callback, error_handler
911
+ # remain UNCHANGED. Omitted here for brevity, but include in final file.)
 
912
  # --- START OMITTED TELEGRAM HANDLERS ---
913
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
914
  user = update.effective_user; mention = user.mention_html()
 
925
  "2. I'll ask how you want it summarised (paragraph or points).\n"
926
  "3. Click the button for your choice.\n"
927
  "4. Wait while I fetch the content and generate the summary!\n\n"
928
+ "βš™οΈ I try multiple methods to get content, especially for tricky websites or YouTube videos without standard transcripts. I then use a sequence of AI models (Llama 4 Scout, Gemini 2.5 Pro, Gemini 2.0 Flash, DeepSeek V3) to summarise.\n\n" # Updated help text
929
  "**Commands:**\n"
930
  "`/start` - Display the welcome message\n"
931
  "`/help` - Show this help message" )
 
983
 
984
  context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
985
 
986
+ # Check essential configurations - requires at least ONE summarizer to be enabled
987
  global TELEGRAM_TOKEN, _groq_enabled, _gemini_api_enabled, _openrouter_fallback_enabled
988
  if not TELEGRAM_TOKEN:
989
  logger.critical("TELEGRAM_TOKEN missing in callback!")
 
995
  try: await query.edit_message_text(text="❌ AI configuration error: No summarization models available.")
996
  except Exception: pass
997
  return
998
+ # Log warnings if specific models/APIs are unavailable but don't stop the process if fallbacks exist
999
+ if not _groq_enabled: logger.warning("Primary AI (Groq Llama 4 Scout) is unavailable.")
1000
+ if not _gemini_api_enabled: logger.warning("Gemini API is unavailable (skipping 2.5 Pro Exp & 2.0 Flash).")
1001
+ if not _openrouter_fallback_enabled: logger.warning("Final Fallback AI (OpenRouter DeepSeek V3) is unavailable.")
 
 
 
 
 
 
1002
 
1003
  logger.info(f"Scheduling background task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit}")
1004
  asyncio.create_task(
 
1020
 
1021
 
1022
  # --- Application Setup & Web Framework ---
1023
+ # (setup_bot_config, lifespan, telegram_webhook, app definition
1024
+ # remain UNCHANGED. health_check is modified below.
1025
  # Omitted here for brevity, include in final file.)
1026
+ # --- START OMITTED APP SETUP/WEB FRAMEWORK (excluding health_check) ---
1027
  async def setup_bot_config() -> Application:
1028
  logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
1029
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
 
1093
  else: logger.info("PTB application was not fully initialized or failed during startup. No shutdown actions needed.")
1094
  logger.info("ASGI Lifespan: Shutdown complete.")
1095
 
1096
+ async def telegram_webhook(request: Request) -> Response:
1097
+ """Handles incoming updates from Telegram."""
1098
+ global WEBHOOK_SECRET
1099
+ if not ptb_app: logger.error("Webhook received but PTB application not initialized."); return PlainTextResponse('Bot not initialized', status_code=503)
1100
+ if not ptb_app.running: logger.warning("Webhook received but PTB application not running."); return PlainTextResponse('Bot not running, cannot process update', status_code=503)
1101
+ if WEBHOOK_SECRET:
1102
+ token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
1103
+ if token_header != WEBHOOK_SECRET: logger.warning(f"Webhook received with invalid secret token. Header: '{token_header}'"); return Response(content="Invalid secret token", status_code=403)
1104
+ try:
1105
+ update_data = await request.json(); update = Update.de_json(data=update_data, bot=ptb_app.bot)
1106
+ logger.debug(f"Processing update_id: {update.update_id} via webhook"); await ptb_app.process_update(update)
1107
+ return Response(status_code=200)
1108
+ except json.JSONDecodeError: logger.error("Webhook received invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
1109
+ except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200)
1110
+
1111
+ # --- Starlette App Definition ---
1112
+ # Note: health_check is defined below
1113
+ app = Starlette(
1114
+ debug=False,
1115
+ lifespan=lifespan,
1116
+ routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ]
1117
+ )
1118
+ logger.info("Starlette ASGI application created with health check and webhook routes.")
1119
+ # --- END OMITTED APP SETUP/WEB FRAMEWORK ---
1120
+
1121
+ # <<< CHANGE: Updated health check response for specific models >>>
1122
  async def health_check(request: Request) -> PlainTextResponse:
1123
  """Simple health check endpoint."""
1124
+ global GROQ_LLAMA4_MODEL, GEMINI_PRO_EXP_MODEL, GEMINI_FLASH_MODEL, OPENROUTER_DEEPSEEK_MODEL, APIFY_ACTOR_ID
 
1125
  global _groq_enabled, _gemini_api_enabled, _openrouter_fallback_enabled
1126
  global _apify_token_exists, _urltotext_key_exists, _rapidapi_key_exists, SUPADATA_API_KEY
1127
 
 
1139
  bot_status = f"Error checking status: {type(e).__name__}"; logger.warning(f"Health check: Error getting bot info: {e}")
1140
  else: bot_status = "Not Initialized"; bot_username = "N/A"
1141
 
1142
+ # <<< Update response string with specific model names >>>
1143
  return PlainTextResponse(
1144
  f"TG Bot Summariser - Status: {bot_status} ({bot_username})\n"
1145
  f"---\n"
1146
+ f"Summarizer Priority (April 2025 - Specific):\n"
1147
+ f"1. Groq API: {GROQ_LLAMA4_MODEL if _groq_enabled else 'DISABLED'}\n"
1148
+ f"2. Gemini API: {GEMINI_PRO_EXP_MODEL if _gemini_api_enabled else 'DISABLED'}\n"
1149
+ f"3. Gemini API: {GEMINI_FLASH_MODEL if _gemini_api_enabled else 'DISABLED'}\n"
1150
+ f"4. OpenRouter API: {OPENROUTER_DEEPSEEK_MODEL if _openrouter_fallback_enabled else 'DISABLED'}\n"
1151
  f"---\n"
1152
  f"Content Fetching Status:\n"
1153
  f"YT Fallback 1 (Supadata): {'Enabled' if SUPADATA_API_KEY else 'Disabled'}\n"
 
1158
  f"Web Scrape 5/6 (Apify Actors): {'Enabled' if _apify_token_exists else 'Disabled'}"
1159
  )
1160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1161
  # --- Development Server (if run directly) ---
1162
  if __name__ == '__main__':
1163
  import uvicorn