fmab777 commited on
Commit
38deed1
·
verified ·
1 Parent(s): b51c818

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +52 -32
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Correcting SyntaxError at line 238 & Updating Gemini Model)
2
  import os
3
  import re
4
  import logging
@@ -62,7 +62,6 @@ logging.getLogger("urllib3").setLevel(logging.INFO)
62
  logging.getLogger('gunicorn.error').setLevel(logging.INFO)
63
  logging.getLogger('uvicorn').setLevel(logging.INFO)
64
  logging.getLogger('starlette').setLevel(logging.INFO)
65
- # Reduce Gemini log noise if needed
66
  if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
67
  logger = logging.getLogger(__name__)
68
  logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
@@ -88,16 +87,17 @@ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
88
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
89
  GEMINI_API_KEY = get_secret('GEMINI_API_KEY')
90
 
91
- # --- Model Configuration ---
92
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
93
  APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
94
- # *** FIX: Update default Gemini model ***
95
  GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
96
 
97
- # --- Key Checks ---
98
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
99
  if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Primary summarization will fail.")
100
- if _gemini_available and not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY not found. Fallback summarization via Gemini disabled.")
 
 
 
101
 
102
  if not URLTOTEXT_API_KEY: pass
103
  if not SUPADATA_API_KEY: pass
@@ -106,21 +106,15 @@ if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found.
106
 
107
  logger.info("Secret loading and configuration check finished.")
108
  logger.info(f"Using OpenRouter Model (Primary): {OPENROUTER_MODEL}")
109
- _gemini_fallback_enabled = _gemini_available and bool(GEMINI_API_KEY) # Define after checking key
110
  if _gemini_fallback_enabled: logger.info(f"Using Gemini Model (Fallback): {GEMINI_MODEL}")
111
- else: logger.info("Gemini Fallback: Disabled (library or API key missing)")
112
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
113
  _apify_token_exists = bool(APIFY_API_TOKEN)
114
 
115
 
116
- # --- Configure Gemini Client ---
117
  if _gemini_fallback_enabled:
118
- try:
119
- genai.configure(api_key=GEMINI_API_KEY)
120
- logger.info("Google GenAI client configured successfully.")
121
- except Exception as e:
122
- logger.error(f"Failed to configure Google GenAI client: {e}")
123
- _gemini_fallback_enabled = False
124
 
125
  # --- Retry Decorator ---
126
  @retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
@@ -145,6 +139,7 @@ def extract_youtube_id(url):
145
 
146
 
147
  # --- Content Fetching Functions ---
 
148
  async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
149
  headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
150
  try:
@@ -235,8 +230,7 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
235
  elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
236
  elif response.status_code == 404:
237
  error_info = "";
238
- try: # Correctly indented try/except
239
- error_info = response.json().get("error", {}).get("message", "")
240
  except Exception: pass
241
  logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}");
242
  return None
@@ -247,7 +241,6 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
247
  except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
248
  except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
249
 
250
-
251
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
252
  global SUPADATA_API_KEY, APIFY_API_TOKEN
253
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
@@ -341,23 +334,54 @@ async def generate_summary_gemini(text: str, summary_type: str) -> str:
341
  MAX_INPUT_LENGTH = 1000000
342
  if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Gemini Fallback] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
343
  full_prompt = f"{prompt}\n\n{text}"
344
- safety_settings = { HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
346
  try:
347
  logger.debug(f"[Gemini Fallback] Initializing model {GEMINI_MODEL}")
348
  model = genai.GenerativeModel(GEMINI_MODEL)
349
  logger.info(f"[Gemini Fallback] Sending request to Gemini ({GEMINI_MODEL})...")
350
- response = await model.generate_content_async( full_prompt, safety_settings=safety_settings )
 
351
  logger.info("[Gemini Fallback] Received response from Gemini.")
352
 
353
- if response.prompt_feedback.block_reason: logger.error(f"[Gemini Fallback] Request blocked. Reason: {response.prompt_feedback.block_reason}"); return f"Sorry, fallback AI blocked summary due to safety filters ({response.prompt_feedback.block_reason})."
354
- summary = response.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  if summary: logger.info(f"[Gemini Fallback] Success generating summary. Output len: {len(summary)}"); return summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
356
- else: logger.warning(f"[Gemini Fallback] Gemini returned empty summary. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}"); return "Sorry, the fallback AI model returned an empty summary."
357
  except Exception as e: logger.error(f"[Gemini Fallback] Unexpected error during Gemini API call: {e}", exc_info=True); return "Sorry, an unexpected error occurred while using the fallback AI service."
358
 
 
359
  async def generate_summary(text: str, summary_type: str) -> str:
360
- """Generates summary using OpenRouter (Primary) with Gemini fallback on ReadTimeout."""
361
  global OPENROUTER_API_KEY, OPENROUTER_MODEL, _gemini_fallback_enabled
362
  logger.info(f"[Primary Summary] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
363
  if not OPENROUTER_API_KEY: logger.error("[Primary Summary] OpenRouter key missing."); return "Error: AI model configuration key missing."
@@ -368,7 +392,8 @@ async def generate_summary(text: str, summary_type: str) -> str:
368
  if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Primary Summary] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
369
  full_prompt = f"{prompt}\n\n{text}"
370
  headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
371
- api_timeouts = httpx.Timeout(15.0, read=180.0, write=15.0, pool=60.0); response = None
 
372
 
373
  try:
374
  async with httpx.AsyncClient(timeout=api_timeouts) as client:
@@ -396,12 +421,7 @@ async def generate_summary(text: str, summary_type: str) -> str:
396
  elif response.status_code == 429: logger.warning("[Primary Summary] OpenRouter Rate Limit Exceeded (429)."); return "Sorry, primary AI model is busy. Try again."
397
  elif response.status_code == 500: logger.error(f"[Primary Summary] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, primary AI service internal error."
398
  else:
399
- # *** FIX: Correct Indentation for error info extraction ***
400
- error_info = ""
401
- try:
402
- error_info = response.json().get("error", {}).get("message", "")
403
- except Exception:
404
- pass
405
  logger.error(f"[Primary Summary] Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
406
  return f"Sorry, primary AI service returned unexpected status ({response.status_code})."
407
 
@@ -457,7 +477,7 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
457
  if content:
458
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
459
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
460
- final_summary = await generate_summary(content, summary_type)
461
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
462
  else:
463
  max_length = 4096; summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
 
1
+ # main.py (Updating Gemini Model and Safety Settings)
2
  import os
3
  import re
4
  import logging
 
62
  logging.getLogger('gunicorn.error').setLevel(logging.INFO)
63
  logging.getLogger('uvicorn').setLevel(logging.INFO)
64
  logging.getLogger('starlette').setLevel(logging.INFO)
 
65
  if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
66
  logger = logging.getLogger(__name__)
67
  logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
 
87
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
88
  GEMINI_API_KEY = get_secret('GEMINI_API_KEY')
89
 
 
90
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
91
  APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
92
+ # *** FIX: Update default Gemini model to gemini-2.0-flash ***
93
  GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
94
 
 
95
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
96
  if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Primary summarization will fail.")
97
+ _gemini_fallback_enabled = _gemini_available and bool(GEMINI_API_KEY)
98
+ if _gemini_fallback_enabled and not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY found in env but value seems empty. Fallback disabled.")
99
+ elif not _gemini_fallback_enabled and _gemini_available : logger.warning("⚠️ WARNING: GEMINI_API_KEY not found. Fallback disabled.")
100
+
101
 
102
  if not URLTOTEXT_API_KEY: pass
103
  if not SUPADATA_API_KEY: pass
 
106
 
107
  logger.info("Secret loading and configuration check finished.")
108
  logger.info(f"Using OpenRouter Model (Primary): {OPENROUTER_MODEL}")
 
109
  if _gemini_fallback_enabled: logger.info(f"Using Gemini Model (Fallback): {GEMINI_MODEL}")
110
+ else: logger.info("Gemini Fallback: Disabled")
111
  logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
112
  _apify_token_exists = bool(APIFY_API_TOKEN)
113
 
114
 
 
115
  if _gemini_fallback_enabled:
116
+ try: genai.configure(api_key=GEMINI_API_KEY); logger.info("Google GenAI client configured successfully.")
117
+ except Exception as e: logger.error(f"Failed to configure Google GenAI client: {e}"); _gemini_fallback_enabled = False
 
 
 
 
118
 
119
  # --- Retry Decorator ---
120
  @retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
 
139
 
140
 
141
  # --- Content Fetching Functions ---
142
+ # (fetch_url_content_for_scrape, get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript, get_website_content, get_website_content_via_api remain the same as previous version)
143
  async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
144
  headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
145
  try:
 
230
  elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
231
  elif response.status_code == 404:
232
  error_info = "";
233
+ try: error_info = response.json().get("error", {}).get("message", "")
 
234
  except Exception: pass
235
  logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}");
236
  return None
 
241
  except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
242
  except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
243
 
 
244
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
245
  global SUPADATA_API_KEY, APIFY_API_TOKEN
246
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
 
334
  MAX_INPUT_LENGTH = 1000000
335
  if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Gemini Fallback] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
336
  full_prompt = f"{prompt}\n\n{text}"
337
+
338
+ # *** FIX: Set all safety settings to BLOCK_NONE ***
339
+ safety_settings = {
340
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
341
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
342
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
343
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
344
+ # Note: CIVIC_INTEGRITY might not be available/needed for all models,
345
+ # but setting it won't hurt if the HarmCategory exists.
346
+ # If it causes an error later, remove this line.
347
+ getattr(HarmCategory, 'HARM_CATEGORY_CIVIC_INTEGRITY', None): HarmBlockThreshold.BLOCK_NONE
348
+ }
349
+ # Filter out None keys in case CIVIC_INTEGRITY doesn't exist
350
+ safety_settings = {k: v for k, v in safety_settings.items() if k is not None}
351
+ logger.debug(f"[Gemini Fallback] Using safety settings: {safety_settings}")
352
+
353
 
354
  try:
355
  logger.debug(f"[Gemini Fallback] Initializing model {GEMINI_MODEL}")
356
  model = genai.GenerativeModel(GEMINI_MODEL)
357
  logger.info(f"[Gemini Fallback] Sending request to Gemini ({GEMINI_MODEL})...")
358
+ request_options = {"timeout": 120}
359
+ response = await model.generate_content_async( full_prompt, safety_settings=safety_settings, request_options=request_options )
360
  logger.info("[Gemini Fallback] Received response from Gemini.")
361
 
362
+ # Checking block reason is still useful for debugging, though unlikely now
363
+ if response.prompt_feedback.block_reason: logger.warning(f"[Gemini Fallback] Request blocked unexpectedly. Reason: {response.prompt_feedback.block_reason}"); # Changed to warning
364
+ # Check candidates for safety blocks as well
365
+ for cand in response.candidates:
366
+ if cand.finish_reason == 'SAFETY':
367
+ logger.warning(f"[Gemini Fallback] Candidate blocked due to SAFETY. Ratings: {cand.safety_ratings}")
368
+
369
+
370
+ # Attempt to get text even if blocked (might be None)
371
+ try:
372
+ summary = response.text
373
+ except ValueError as e:
374
+ # Handle cases where accessing response.text raises ValueError (e.g., blocked content)
375
+ logger.warning(f"[Gemini Fallback] Error accessing response text (likely blocked content): {e}")
376
+ summary = None # Ensure summary is None
377
+
378
  if summary: logger.info(f"[Gemini Fallback] Success generating summary. Output len: {len(summary)}"); return summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
379
+ else: logger.warning(f"[Gemini Fallback] Gemini returned empty summary or content was blocked. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}"); return "Sorry, the fallback AI model did not provide a summary (possibly due to content filters)."
380
  except Exception as e: logger.error(f"[Gemini Fallback] Unexpected error during Gemini API call: {e}", exc_info=True); return "Sorry, an unexpected error occurred while using the fallback AI service."
381
 
382
+
383
  async def generate_summary(text: str, summary_type: str) -> str:
384
+ """Generates summary using OpenRouter (Primary) with Gemini fallback on 10s ReadTimeout."""
385
  global OPENROUTER_API_KEY, OPENROUTER_MODEL, _gemini_fallback_enabled
386
  logger.info(f"[Primary Summary] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
387
  if not OPENROUTER_API_KEY: logger.error("[Primary Summary] OpenRouter key missing."); return "Error: AI model configuration key missing."
 
392
  if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Primary Summary] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
393
  full_prompt = f"{prompt}\n\n{text}"
394
  headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
395
+ api_timeouts = httpx.Timeout(15.0, read=10.0, write=15.0, pool=60.0) # 10 second read timeout
396
+ response = None
397
 
398
  try:
399
  async with httpx.AsyncClient(timeout=api_timeouts) as client:
 
421
  elif response.status_code == 429: logger.warning("[Primary Summary] OpenRouter Rate Limit Exceeded (429)."); return "Sorry, primary AI model is busy. Try again."
422
  elif response.status_code == 500: logger.error(f"[Primary Summary] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, primary AI service internal error."
423
  else:
424
+ error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
 
 
 
 
 
425
  logger.error(f"[Primary Summary] Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
426
  return f"Sorry, primary AI service returned unexpected status ({response.status_code})."
427
 
 
477
  if content:
478
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
479
  await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
480
+ final_summary = await generate_summary(content, summary_type) # This now handles the fallback internally
481
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
482
  else:
483
  max_length = 4096; summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]