Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# main.py (
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
@@ -50,7 +50,7 @@ except ImportError:
|
|
50 |
HarmCategory = None
|
51 |
HarmBlockThreshold = None
|
52 |
_gemini_available = False
|
53 |
-
logger
|
54 |
|
55 |
|
56 |
# --- Logging Setup ---
|
@@ -66,6 +66,8 @@ logging.getLogger('starlette').setLevel(logging.INFO)
|
|
66 |
if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
|
67 |
logger = logging.getLogger(__name__)
|
68 |
logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
|
|
|
|
|
69 |
|
70 |
# --- Global variable for PTB app ---
|
71 |
ptb_app: Optional[Application] = None
|
@@ -84,18 +86,17 @@ URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY')
|
|
84 |
SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
|
85 |
APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
86 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
87 |
-
GEMINI_API_KEY = get_secret('GEMINI_API_KEY')
|
88 |
|
89 |
# --- Model Configuration ---
|
90 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
91 |
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
|
92 |
-
#
|
93 |
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
|
94 |
|
95 |
# --- Key Checks ---
|
96 |
if not TELEGRAM_TOKEN: logger.critical("β FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
97 |
if not OPENROUTER_API_KEY: logger.error("β ERROR: OPENROUTER_API_KEY not found. Primary summarization will fail.")
|
98 |
-
# Gemini is a fallback, so only warn if key is missing and library is available
|
99 |
if _gemini_available and not GEMINI_API_KEY: logger.warning("β οΈ WARNING: GEMINI_API_KEY not found. Fallback summarization via Gemini disabled.")
|
100 |
|
101 |
if not URLTOTEXT_API_KEY: pass
|
@@ -105,23 +106,22 @@ if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found.
|
|
105 |
|
106 |
logger.info("Secret loading and configuration check finished.")
|
107 |
logger.info(f"Using OpenRouter Model (Primary): {OPENROUTER_MODEL}")
|
108 |
-
|
|
|
109 |
else: logger.info("Gemini Fallback: Disabled (library or API key missing)")
|
110 |
logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
|
111 |
-
|
112 |
_apify_token_exists = bool(APIFY_API_TOKEN)
|
113 |
-
_gemini_fallback_enabled = _gemini_available and bool(GEMINI_API_KEY)
|
114 |
|
115 |
-
|
|
|
116 |
if _gemini_fallback_enabled:
|
117 |
try:
|
118 |
genai.configure(api_key=GEMINI_API_KEY)
|
119 |
logger.info("Google GenAI client configured successfully.")
|
120 |
except Exception as e:
|
121 |
logger.error(f"Failed to configure Google GenAI client: {e}")
|
122 |
-
_gemini_fallback_enabled = False
|
123 |
|
124 |
-
# (Retry Decorator, Helper Functions remain the same)
|
125 |
# --- Retry Decorator ---
|
126 |
@retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
|
127 |
async def retry_bot_operation(func, *args, **kwargs):
|
@@ -145,7 +145,6 @@ def extract_youtube_id(url):
|
|
145 |
|
146 |
|
147 |
# --- Content Fetching Functions ---
|
148 |
-
# (fetch_url_content_for_scrape, get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript, get_website_content, get_website_content_via_api remain the same as previous version)
|
149 |
async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
|
150 |
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
|
151 |
try:
|
@@ -235,8 +234,12 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
|
|
235 |
elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
|
236 |
elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
|
237 |
elif response.status_code == 404:
|
238 |
-
error_info = "";
|
239 |
-
|
|
|
|
|
|
|
|
|
240 |
else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
|
241 |
|
242 |
except httpx.TimeoutException as e: logger.error(f"[Apify SyncItems] Timeout during API interaction for {video_url}: {e}"); return None
|
@@ -244,6 +247,7 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
|
|
244 |
except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
|
245 |
except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
|
246 |
|
|
|
247 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
248 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
249 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
@@ -326,76 +330,38 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
|
326 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
327 |
|
328 |
# --- Summarization Functions ---
|
329 |
-
|
330 |
-
# --- NEW: Gemini Summarization Function ---
|
331 |
async def generate_summary_gemini(text: str, summary_type: str) -> str:
|
332 |
"""Generates summary using Google Gemini API (Fallback)."""
|
333 |
global GEMINI_MODEL, _gemini_fallback_enabled
|
334 |
-
if not _gemini_fallback_enabled:
|
335 |
-
logger.error("[Gemini Fallback] Called but is disabled (no library or key).")
|
336 |
-
return "Error: Fallback AI service not available."
|
337 |
-
|
338 |
logger.info(f"[Gemini Fallback] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
|
339 |
|
340 |
-
# Reuse the same prompts as OpenRouter/DeepSeek for consistency
|
341 |
if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β’ Uses British English spellings throughout.\n" "β’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β’ Presented as ONE SINGLE PARAGRAPH.\n" "β’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β’ Considers the entire text content equally.\n" "β’ Uses semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
342 |
else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β’ For each distinct topic or section identified in the text, create a heading.\n" "β’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β’ Immediately following each heading, list the key points as a bulleted list.\n" "β’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β’ The text within each bullet point should NOT contain any bold formatting.\n" "β’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β’ Use British English spellings throughout.\n" "β’ Avoid overly complex or advanced vocabulary.\n" "β’ Keep bullet points concise.\n" "β’ Ensure the entire summary takes no more than two minutes to read.\n" "β’ Consider the entire text's content, not just the beginning or a few topics.\n" "β’ Use semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
343 |
-
|
344 |
-
|
345 |
-
MAX_INPUT_LENGTH = 1000000 # Gemini Flash limit is ~1M tokens
|
346 |
-
if len(text) > MAX_INPUT_LENGTH:
|
347 |
-
logger.warning(f"[Gemini Fallback] Input length ({len(text)}) exceeds assumed limit ({MAX_INPUT_LENGTH}). Truncating.")
|
348 |
-
text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
|
349 |
full_prompt = f"{prompt}\n\n{text}"
|
350 |
-
|
351 |
-
# Configure safety settings - block potentially harmful content
|
352 |
-
safety_settings = {
|
353 |
-
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
354 |
-
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
355 |
-
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
356 |
-
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
357 |
-
}
|
358 |
|
359 |
try:
|
360 |
logger.debug(f"[Gemini Fallback] Initializing model {GEMINI_MODEL}")
|
361 |
model = genai.GenerativeModel(GEMINI_MODEL)
|
362 |
logger.info(f"[Gemini Fallback] Sending request to Gemini ({GEMINI_MODEL})...")
|
363 |
-
|
364 |
-
response = await model.generate_content_async(
|
365 |
-
full_prompt,
|
366 |
-
safety_settings=safety_settings
|
367 |
-
)
|
368 |
logger.info("[Gemini Fallback] Received response from Gemini.")
|
369 |
|
370 |
-
|
371 |
-
if response.prompt_feedback.block_reason:
|
372 |
-
logger.error(f"[Gemini Fallback] Request blocked by Gemini. Reason: {response.prompt_feedback.block_reason}")
|
373 |
-
return f"Sorry, the content could not be summarized by the fallback AI due to safety filters ({response.prompt_feedback.block_reason})."
|
374 |
-
|
375 |
summary = response.text
|
376 |
-
if summary:
|
377 |
-
|
378 |
-
|
379 |
-
summary = summary.replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
|
380 |
-
return summary.strip()
|
381 |
-
else:
|
382 |
-
logger.warning(f"[Gemini Fallback] Gemini returned an empty summary. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}")
|
383 |
-
return "Sorry, the fallback AI model returned an empty summary."
|
384 |
|
385 |
-
except Exception as e:
|
386 |
-
# Catch potential API errors (e.g., google.api_core.exceptions) or others
|
387 |
-
logger.error(f"[Gemini Fallback] Unexpected error during Gemini API call: {e}", exc_info=True)
|
388 |
-
return "Sorry, an unexpected error occurred while using the fallback AI service."
|
389 |
-
|
390 |
-
|
391 |
-
# --- MODIFIED: Primary Summarization Function (Adds Fallback Logic) ---
|
392 |
async def generate_summary(text: str, summary_type: str) -> str:
|
393 |
"""Generates summary using OpenRouter (Primary) with Gemini fallback on ReadTimeout."""
|
394 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL, _gemini_fallback_enabled
|
395 |
logger.info(f"[Primary Summary] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
396 |
if not OPENROUTER_API_KEY: logger.error("[Primary Summary] OpenRouter key missing."); return "Error: AI model configuration key missing."
|
397 |
|
398 |
-
# Prompts (same as before)
|
399 |
if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β’ Uses British English spellings throughout.\n" "β’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β’ Presented as ONE SINGLE PARAGRAPH.\n" "β’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β’ Considers the entire text content equally.\n" "β’ Uses semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
400 |
else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β’ For each distinct topic or section identified in the text, create a heading.\n" "β’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β’ Immediately following each heading, list the key points as a bulleted list.\n" "β’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β’ The text within each bullet point should NOT contain any bold formatting.\n" "β’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β’ Use British English spellings throughout.\n" "β’ Avoid overly complex or advanced vocabulary.\n" "β’ Keep bullet points concise.\n" "β’ Ensure the entire summary takes no more than two minutes to read.\n" "β’ Consider the entire text's content, not just the beginning or a few topics.\n" "β’ Use semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
401 |
MAX_INPUT_LENGTH = 500000
|
@@ -405,7 +371,6 @@ async def generate_summary(text: str, summary_type: str) -> str:
|
|
405 |
api_timeouts = httpx.Timeout(15.0, read=180.0, write=15.0, pool=60.0); response = None
|
406 |
|
407 |
try:
|
408 |
-
# --- Attempt OpenRouter Request ---
|
409 |
async with httpx.AsyncClient(timeout=api_timeouts) as client:
|
410 |
logger.info(f"[Primary Summary] Sending request to OpenRouter ({OPENROUTER_MODEL}) with read timeout {api_timeouts.read}s...")
|
411 |
try:
|
@@ -413,9 +378,8 @@ async def generate_summary(text: str, summary_type: str) -> str:
|
|
413 |
if response: logger.info(f"[Primary Summary] Received response from OpenRouter. Status code: {response.status_code}")
|
414 |
else: logger.error("[Primary Summary] No response from OpenRouter (unexpected)."); return "Sorry, primary AI service failed unexpectedly."
|
415 |
|
416 |
-
# --- Process OpenRouter Response ---
|
417 |
if response.status_code == 200:
|
418 |
-
try:
|
419 |
data = response.json()
|
420 |
if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
|
421 |
message = data["choices"][0].get("message")
|
@@ -427,38 +391,33 @@ async def generate_summary(text: str, summary_type: str) -> str:
|
|
427 |
else: logger.error(f"[Primary Summary] Unexpected choices structure: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse primary AI response (choices)."
|
428 |
except json.JSONDecodeError: logger.error(f"[Primary Summary] Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand primary AI response."
|
429 |
except Exception as e: logger.error(f"[Primary Summary] Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing primary AI response."
|
430 |
-
|
431 |
-
# --- Handle OpenRouter Error Status Codes ---
|
432 |
elif response.status_code == 401: logger.error("[Primary Summary] OpenRouter API key invalid (401)."); return "Error: Primary AI model configuration key is invalid."
|
433 |
elif response.status_code == 402: logger.error("[Primary Summary] OpenRouter Payment Required (402)."); return "Sorry, primary AI service limits/payment issue."
|
434 |
elif response.status_code == 429: logger.warning("[Primary Summary] OpenRouter Rate Limit Exceeded (429)."); return "Sorry, primary AI model is busy. Try again."
|
435 |
elif response.status_code == 500: logger.error(f"[Primary Summary] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, primary AI service internal error."
|
436 |
-
else:
|
437 |
-
|
|
|
|
|
|
|
|
|
|
|
438 |
logger.error(f"[Primary Summary] Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
|
439 |
return f"Sorry, primary AI service returned unexpected status ({response.status_code})."
|
440 |
|
441 |
-
# --- Catch OpenRouter Read Timeout -> Trigger Fallback ---
|
442 |
except httpx.ReadTimeout:
|
443 |
logger.warning(f"[Primary Summary] Read Timeout ({api_timeouts.read}s) waiting for OpenRouter. Attempting Gemini fallback...")
|
444 |
-
if _gemini_fallback_enabled:
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
logger.error("[Fallback Attempt] Gemini fallback skipped (disabled or key missing).")
|
449 |
-
return f"Sorry, the primary AI service timed out after {api_timeouts.read} seconds, and the fallback service is not available."
|
450 |
-
# --- Catch Other Timeouts (Connect, Write, Pool) ---
|
451 |
-
except httpx.TimeoutException as e:
|
452 |
-
logger.error(f"[Primary Summary] Timeout error ({type(e)}) connecting/writing to OpenRouter API: {e}")
|
453 |
-
return "Sorry, the request to the primary AI model timed out. Please try again."
|
454 |
-
|
455 |
-
# --- Catch Request Errors and Other Exceptions ---
|
456 |
except httpx.RequestError as e: logger.error(f"[Primary Summary] Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the primary AI model service."
|
457 |
except Exception as e:
|
458 |
logger.error(f"[Primary Summary] Unexpected error in generate_summary (Outer try): {e}", exc_info=True)
|
459 |
if response: logger.error(f"--> Last OpenRouter response status before error: {response.status_code}")
|
460 |
return "Sorry, an unexpected error occurred while trying to generate the summary."
|
461 |
|
|
|
462 |
# (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
|
463 |
async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
|
464 |
task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
|
@@ -498,7 +457,6 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
|
|
498 |
if content:
|
499 |
logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
|
500 |
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
501 |
-
# Call the primary summary function (which now includes the fallback)
|
502 |
final_summary = await generate_summary(content, summary_type)
|
503 |
if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
|
504 |
else:
|
@@ -569,13 +527,13 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
|
|
569 |
|
570 |
context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
|
571 |
|
572 |
-
global TELEGRAM_TOKEN, OPENROUTER_API_KEY
|
573 |
if not TELEGRAM_TOKEN:
|
574 |
logger.critical("TG TOKEN missing!")
|
575 |
try: await query.edit_message_text(text="β Bot config error.")
|
576 |
except Exception: pass
|
577 |
return
|
578 |
-
if not OPENROUTER_API_KEY:
|
579 |
logger.error("OpenRouter key missing!")
|
580 |
try: await query.edit_message_text(text="β AI config error.")
|
581 |
except Exception: pass
|
@@ -650,13 +608,7 @@ async def health_check(request: Request) -> PlainTextResponse:
|
|
650 |
if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
|
651 |
else: bot_status = "Initialized/Not running"
|
652 |
except Exception as e: bot_status = f"Error checking status: {e}"
|
653 |
-
|
654 |
-
return PlainTextResponse(
|
655 |
-
f"TG Bot Summarizer - Status: {bot_status}\n"
|
656 |
-
f"Primary Model: {OPENROUTER_MODEL}\n"
|
657 |
-
f"Fallback Model: {GEMINI_MODEL if _gemini_fallback_enabled else 'N/A (Disabled)'}\n"
|
658 |
-
f"Apify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}"
|
659 |
-
)
|
660 |
|
661 |
async def telegram_webhook(request: Request) -> Response:
|
662 |
global WEBHOOK_SECRET
|
|
|
1 |
+
# main.py (Correcting SyntaxError at line 238 & Updating Gemini Model)
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
|
|
50 |
HarmCategory = None
|
51 |
HarmBlockThreshold = None
|
52 |
_gemini_available = False
|
53 |
+
# logger will be defined later, log warning after logger setup
|
54 |
|
55 |
|
56 |
# --- Logging Setup ---
|
|
|
66 |
if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
|
67 |
logger = logging.getLogger(__name__)
|
68 |
logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
|
69 |
+
if not _gemini_available: logger.warning("google-generativeai library not found. Gemini fallback disabled.")
|
70 |
+
|
71 |
|
72 |
# --- Global variable for PTB app ---
|
73 |
ptb_app: Optional[Application] = None
|
|
|
86 |
SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY')
|
87 |
APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
88 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
89 |
+
GEMINI_API_KEY = get_secret('GEMINI_API_KEY')
|
90 |
|
91 |
# --- Model Configuration ---
|
92 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
93 |
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
|
94 |
+
# *** FIX: Update default Gemini model ***
|
95 |
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
|
96 |
|
97 |
# --- Key Checks ---
|
98 |
if not TELEGRAM_TOKEN: logger.critical("β FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
99 |
if not OPENROUTER_API_KEY: logger.error("β ERROR: OPENROUTER_API_KEY not found. Primary summarization will fail.")
|
|
|
100 |
if _gemini_available and not GEMINI_API_KEY: logger.warning("β οΈ WARNING: GEMINI_API_KEY not found. Fallback summarization via Gemini disabled.")
|
101 |
|
102 |
if not URLTOTEXT_API_KEY: pass
|
|
|
106 |
|
107 |
logger.info("Secret loading and configuration check finished.")
|
108 |
logger.info(f"Using OpenRouter Model (Primary): {OPENROUTER_MODEL}")
|
109 |
+
_gemini_fallback_enabled = _gemini_available and bool(GEMINI_API_KEY) # Define after checking key
|
110 |
+
if _gemini_fallback_enabled: logger.info(f"Using Gemini Model (Fallback): {GEMINI_MODEL}")
|
111 |
else: logger.info("Gemini Fallback: Disabled (library or API key missing)")
|
112 |
logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
|
|
|
113 |
_apify_token_exists = bool(APIFY_API_TOKEN)
|
|
|
114 |
|
115 |
+
|
116 |
+
# --- Configure Gemini Client ---
|
117 |
if _gemini_fallback_enabled:
|
118 |
try:
|
119 |
genai.configure(api_key=GEMINI_API_KEY)
|
120 |
logger.info("Google GenAI client configured successfully.")
|
121 |
except Exception as e:
|
122 |
logger.error(f"Failed to configure Google GenAI client: {e}")
|
123 |
+
_gemini_fallback_enabled = False
|
124 |
|
|
|
125 |
# --- Retry Decorator ---
|
126 |
@retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
|
127 |
async def retry_bot_operation(func, *args, **kwargs):
|
|
|
145 |
|
146 |
|
147 |
# --- Content Fetching Functions ---
|
|
|
148 |
async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
|
149 |
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
|
150 |
try:
|
|
|
234 |
elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
|
235 |
elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
|
236 |
elif response.status_code == 404:
|
237 |
+
error_info = "";
|
238 |
+
try: # Correctly indented try/except
|
239 |
+
error_info = response.json().get("error", {}).get("message", "")
|
240 |
+
except Exception: pass
|
241 |
+
logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}");
|
242 |
+
return None
|
243 |
else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
|
244 |
|
245 |
except httpx.TimeoutException as e: logger.error(f"[Apify SyncItems] Timeout during API interaction for {video_url}: {e}"); return None
|
|
|
247 |
except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
|
248 |
except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
|
249 |
|
250 |
+
|
251 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
252 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
253 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
|
|
330 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
331 |
|
332 |
# --- Summarization Functions ---
|
|
|
|
|
333 |
async def generate_summary_gemini(text: str, summary_type: str) -> str:
|
334 |
"""Generates summary using Google Gemini API (Fallback)."""
|
335 |
global GEMINI_MODEL, _gemini_fallback_enabled
|
336 |
+
if not _gemini_fallback_enabled: logger.error("[Gemini Fallback] Called but is disabled."); return "Error: Fallback AI service not available."
|
|
|
|
|
|
|
337 |
logger.info(f"[Gemini Fallback] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
|
338 |
|
|
|
339 |
if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β’ Uses British English spellings throughout.\n" "β’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β’ Presented as ONE SINGLE PARAGRAPH.\n" "β’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β’ Considers the entire text content equally.\n" "β’ Uses semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
340 |
else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β’ For each distinct topic or section identified in the text, create a heading.\n" "β’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β’ Immediately following each heading, list the key points as a bulleted list.\n" "β’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β’ The text within each bullet point should NOT contain any bold formatting.\n" "β’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β’ Use British English spellings throughout.\n" "β’ Avoid overly complex or advanced vocabulary.\n" "β’ Keep bullet points concise.\n" "β’ Ensure the entire summary takes no more than two minutes to read.\n" "β’ Consider the entire text's content, not just the beginning or a few topics.\n" "β’ Use semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
341 |
+
MAX_INPUT_LENGTH = 1000000
|
342 |
+
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Gemini Fallback] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
|
|
|
|
|
|
|
|
|
343 |
full_prompt = f"{prompt}\n\n{text}"
|
344 |
+
safety_settings = { HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
|
346 |
try:
|
347 |
logger.debug(f"[Gemini Fallback] Initializing model {GEMINI_MODEL}")
|
348 |
model = genai.GenerativeModel(GEMINI_MODEL)
|
349 |
logger.info(f"[Gemini Fallback] Sending request to Gemini ({GEMINI_MODEL})...")
|
350 |
+
response = await model.generate_content_async( full_prompt, safety_settings=safety_settings )
|
|
|
|
|
|
|
|
|
351 |
logger.info("[Gemini Fallback] Received response from Gemini.")
|
352 |
|
353 |
+
if response.prompt_feedback.block_reason: logger.error(f"[Gemini Fallback] Request blocked. Reason: {response.prompt_feedback.block_reason}"); return f"Sorry, fallback AI blocked summary due to safety filters ({response.prompt_feedback.block_reason})."
|
|
|
|
|
|
|
|
|
354 |
summary = response.text
|
355 |
+
if summary: logger.info(f"[Gemini Fallback] Success generating summary. Output len: {len(summary)}"); return summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
|
356 |
+
else: logger.warning(f"[Gemini Fallback] Gemini returned empty summary. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}"); return "Sorry, the fallback AI model returned an empty summary."
|
357 |
+
except Exception as e: logger.error(f"[Gemini Fallback] Unexpected error during Gemini API call: {e}", exc_info=True); return "Sorry, an unexpected error occurred while using the fallback AI service."
|
|
|
|
|
|
|
|
|
|
|
358 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
async def generate_summary(text: str, summary_type: str) -> str:
|
360 |
"""Generates summary using OpenRouter (Primary) with Gemini fallback on ReadTimeout."""
|
361 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL, _gemini_fallback_enabled
|
362 |
logger.info(f"[Primary Summary] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
363 |
if not OPENROUTER_API_KEY: logger.error("[Primary Summary] OpenRouter key missing."); return "Error: AI model configuration key missing."
|
364 |
|
|
|
365 |
if summary_type == "paragraph": prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST be:\n" "β’ Clear and simple language suitable for someone unfamiliar with the topic.\n" "β’ Uses British English spellings throughout.\n" "β’ Straightforward and understandable vocabulary; avoid complex terms.\n" "β’ Presented as ONE SINGLE PARAGRAPH.\n" "β’ No more than 85 words maximum; but does not have to be exactly 85.\n" "β’ Considers the entire text content equally.\n" "β’ Uses semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
366 |
else: prompt = ("You are an AI model designed to provide concise summaries using British English spellings. Your output MUST strictly follow this Markdown format:\n\n" "β’ For each distinct topic or section identified in the text, create a heading.\n" "β’ Each heading MUST be enclosed in double asterisks for bolding (e.g., **Section Title**).\n" "β’ Immediately following each heading, list the key points as a bulleted list.\n" "β’ Each bullet point MUST start with a hyphen and a space (- ) on a new line.\n" "β’ The text within each bullet point should NOT contain any bold formatting.\n" "β’ Use clear, simple, and straightforward language suitable for someone unfamiliar with the topic.\n" "β’ Use British English spellings throughout.\n" "β’ Avoid overly complex or advanced vocabulary.\n" "β’ Keep bullet points concise.\n" "β’ Ensure the entire summary takes no more than two minutes to read.\n" "β’ Consider the entire text's content, not just the beginning or a few topics.\n" "β’ Use semicolons (;) instead of em dashes (β or β).\n\n" "Here is the text to summarise:")
|
367 |
MAX_INPUT_LENGTH = 500000
|
|
|
371 |
api_timeouts = httpx.Timeout(15.0, read=180.0, write=15.0, pool=60.0); response = None
|
372 |
|
373 |
try:
|
|
|
374 |
async with httpx.AsyncClient(timeout=api_timeouts) as client:
|
375 |
logger.info(f"[Primary Summary] Sending request to OpenRouter ({OPENROUTER_MODEL}) with read timeout {api_timeouts.read}s...")
|
376 |
try:
|
|
|
378 |
if response: logger.info(f"[Primary Summary] Received response from OpenRouter. Status code: {response.status_code}")
|
379 |
else: logger.error("[Primary Summary] No response from OpenRouter (unexpected)."); return "Sorry, primary AI service failed unexpectedly."
|
380 |
|
|
|
381 |
if response.status_code == 200:
|
382 |
+
try:
|
383 |
data = response.json()
|
384 |
if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
|
385 |
message = data["choices"][0].get("message")
|
|
|
391 |
else: logger.error(f"[Primary Summary] Unexpected choices structure: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse primary AI response (choices)."
|
392 |
except json.JSONDecodeError: logger.error(f"[Primary Summary] Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand primary AI response."
|
393 |
except Exception as e: logger.error(f"[Primary Summary] Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing primary AI response."
|
|
|
|
|
394 |
elif response.status_code == 401: logger.error("[Primary Summary] OpenRouter API key invalid (401)."); return "Error: Primary AI model configuration key is invalid."
|
395 |
elif response.status_code == 402: logger.error("[Primary Summary] OpenRouter Payment Required (402)."); return "Sorry, primary AI service limits/payment issue."
|
396 |
elif response.status_code == 429: logger.warning("[Primary Summary] OpenRouter Rate Limit Exceeded (429)."); return "Sorry, primary AI model is busy. Try again."
|
397 |
elif response.status_code == 500: logger.error(f"[Primary Summary] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, primary AI service internal error."
|
398 |
+
else:
|
399 |
+
# *** FIX: Correct Indentation for error info extraction ***
|
400 |
+
error_info = ""
|
401 |
+
try:
|
402 |
+
error_info = response.json().get("error", {}).get("message", "")
|
403 |
+
except Exception:
|
404 |
+
pass
|
405 |
logger.error(f"[Primary Summary] Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
|
406 |
return f"Sorry, primary AI service returned unexpected status ({response.status_code})."
|
407 |
|
|
|
408 |
except httpx.ReadTimeout:
|
409 |
logger.warning(f"[Primary Summary] Read Timeout ({api_timeouts.read}s) waiting for OpenRouter. Attempting Gemini fallback...")
|
410 |
+
if _gemini_fallback_enabled: return await generate_summary_gemini(text, summary_type)
|
411 |
+
else: logger.error("[Fallback Attempt] Gemini fallback skipped (disabled or key missing)."); return f"Sorry, the primary AI service timed out after {api_timeouts.read} seconds, and the fallback service is not available."
|
412 |
+
except httpx.TimeoutException as e: logger.error(f"[Primary Summary] Timeout error ({type(e)}) connecting/writing to OpenRouter API: {e}"); return "Sorry, the request to the primary AI model timed out. Please try again."
|
413 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
414 |
except httpx.RequestError as e: logger.error(f"[Primary Summary] Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the primary AI model service."
|
415 |
except Exception as e:
|
416 |
logger.error(f"[Primary Summary] Unexpected error in generate_summary (Outer try): {e}", exc_info=True)
|
417 |
if response: logger.error(f"--> Last OpenRouter response status before error: {response.status_code}")
|
418 |
return "Sorry, an unexpected error occurred while trying to generate the summary."
|
419 |
|
420 |
+
|
421 |
# (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
|
422 |
async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
|
423 |
task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
|
|
|
457 |
if content:
|
458 |
logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
|
459 |
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
|
|
460 |
final_summary = await generate_summary(content, summary_type)
|
461 |
if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
|
462 |
else:
|
|
|
527 |
|
528 |
context.user_data.pop('url_to_summarize', None); context.user_data.pop('original_message_id', None); logger.debug(f"Cleared URL context for user {user.id}")
|
529 |
|
530 |
+
global TELEGRAM_TOKEN, OPENROUTER_API_KEY
|
531 |
if not TELEGRAM_TOKEN:
|
532 |
logger.critical("TG TOKEN missing!")
|
533 |
try: await query.edit_message_text(text="β Bot config error.")
|
534 |
except Exception: pass
|
535 |
return
|
536 |
+
if not OPENROUTER_API_KEY:
|
537 |
logger.error("OpenRouter key missing!")
|
538 |
try: await query.edit_message_text(text="β AI config error.")
|
539 |
except Exception: pass
|
|
|
608 |
if ptb_app.running: bot_info = await ptb_app.bot.get_me(); bot_status = f"Running (@{bot_info.username})"
|
609 |
else: bot_status = "Initialized/Not running"
|
610 |
except Exception as e: bot_status = f"Error checking status: {e}"
|
611 |
+
return PlainTextResponse( f"TG Bot Summarizer - Status: {bot_status}\n" f"Primary Model: {OPENROUTER_MODEL}\n" f"Fallback Model: {GEMINI_MODEL if _gemini_fallback_enabled else 'N/A (Disabled)'}\n" f"Apify Actor: {APIFY_ACTOR_ID if _apify_token_exists else 'N/A (No Token)'}" )
|
|
|
|
|
|
|
|
|
|
|
|
|
612 |
|
613 |
async def telegram_webhook(request: Request) -> Response:
|
614 |
global WEBHOOK_SECRET
|