Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# main.py (
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
@@ -62,7 +62,6 @@ logging.getLogger("urllib3").setLevel(logging.INFO)
|
|
62 |
logging.getLogger('gunicorn.error').setLevel(logging.INFO)
|
63 |
logging.getLogger('uvicorn').setLevel(logging.INFO)
|
64 |
logging.getLogger('starlette').setLevel(logging.INFO)
|
65 |
-
# Reduce Gemini log noise if needed
|
66 |
if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
|
67 |
logger = logging.getLogger(__name__)
|
68 |
logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
|
@@ -88,16 +87,17 @@ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
|
88 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
89 |
GEMINI_API_KEY = get_secret('GEMINI_API_KEY')
|
90 |
|
91 |
-
# --- Model Configuration ---
|
92 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
93 |
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
|
94 |
-
# *** FIX: Update default Gemini model ***
|
95 |
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
|
96 |
|
97 |
-
# --- Key Checks ---
|
98 |
if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
99 |
if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Primary summarization will fail.")
|
100 |
-
|
|
|
|
|
|
|
101 |
|
102 |
if not URLTOTEXT_API_KEY: pass
|
103 |
if not SUPADATA_API_KEY: pass
|
@@ -106,21 +106,15 @@ if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found.
|
|
106 |
|
107 |
logger.info("Secret loading and configuration check finished.")
|
108 |
logger.info(f"Using OpenRouter Model (Primary): {OPENROUTER_MODEL}")
|
109 |
-
_gemini_fallback_enabled = _gemini_available and bool(GEMINI_API_KEY) # Define after checking key
|
110 |
if _gemini_fallback_enabled: logger.info(f"Using Gemini Model (Fallback): {GEMINI_MODEL}")
|
111 |
-
else: logger.info("Gemini Fallback: Disabled
|
112 |
logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
|
113 |
_apify_token_exists = bool(APIFY_API_TOKEN)
|
114 |
|
115 |
|
116 |
-
# --- Configure Gemini Client ---
|
117 |
if _gemini_fallback_enabled:
|
118 |
-
try:
|
119 |
-
|
120 |
-
logger.info("Google GenAI client configured successfully.")
|
121 |
-
except Exception as e:
|
122 |
-
logger.error(f"Failed to configure Google GenAI client: {e}")
|
123 |
-
_gemini_fallback_enabled = False
|
124 |
|
125 |
# --- Retry Decorator ---
|
126 |
@retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
|
@@ -145,6 +139,7 @@ def extract_youtube_id(url):
|
|
145 |
|
146 |
|
147 |
# --- Content Fetching Functions ---
|
|
|
148 |
async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
|
149 |
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
|
150 |
try:
|
@@ -235,8 +230,7 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
|
|
235 |
elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
|
236 |
elif response.status_code == 404:
|
237 |
error_info = "";
|
238 |
-
try:
|
239 |
-
error_info = response.json().get("error", {}).get("message", "")
|
240 |
except Exception: pass
|
241 |
logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}");
|
242 |
return None
|
@@ -247,7 +241,6 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
|
|
247 |
except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
|
248 |
except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
|
249 |
|
250 |
-
|
251 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
252 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
253 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
@@ -341,23 +334,54 @@ async def generate_summary_gemini(text: str, summary_type: str) -> str:
|
|
341 |
MAX_INPUT_LENGTH = 1000000
|
342 |
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Gemini Fallback] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
|
343 |
full_prompt = f"{prompt}\n\n{text}"
|
344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
|
346 |
try:
|
347 |
logger.debug(f"[Gemini Fallback] Initializing model {GEMINI_MODEL}")
|
348 |
model = genai.GenerativeModel(GEMINI_MODEL)
|
349 |
logger.info(f"[Gemini Fallback] Sending request to Gemini ({GEMINI_MODEL})...")
|
350 |
-
|
|
|
351 |
logger.info("[Gemini Fallback] Received response from Gemini.")
|
352 |
|
353 |
-
|
354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
if summary: logger.info(f"[Gemini Fallback] Success generating summary. Output len: {len(summary)}"); return summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
|
356 |
-
else: logger.warning(f"[Gemini Fallback] Gemini returned empty summary. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}"); return "Sorry, the fallback AI model
|
357 |
except Exception as e: logger.error(f"[Gemini Fallback] Unexpected error during Gemini API call: {e}", exc_info=True); return "Sorry, an unexpected error occurred while using the fallback AI service."
|
358 |
|
|
|
359 |
async def generate_summary(text: str, summary_type: str) -> str:
|
360 |
-
"""Generates summary using OpenRouter (Primary) with Gemini fallback on ReadTimeout."""
|
361 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL, _gemini_fallback_enabled
|
362 |
logger.info(f"[Primary Summary] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
363 |
if not OPENROUTER_API_KEY: logger.error("[Primary Summary] OpenRouter key missing."); return "Error: AI model configuration key missing."
|
@@ -368,7 +392,8 @@ async def generate_summary(text: str, summary_type: str) -> str:
|
|
368 |
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Primary Summary] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
|
369 |
full_prompt = f"{prompt}\n\n{text}"
|
370 |
headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
|
371 |
-
api_timeouts = httpx.Timeout(15.0, read=
|
|
|
372 |
|
373 |
try:
|
374 |
async with httpx.AsyncClient(timeout=api_timeouts) as client:
|
@@ -396,12 +421,7 @@ async def generate_summary(text: str, summary_type: str) -> str:
|
|
396 |
elif response.status_code == 429: logger.warning("[Primary Summary] OpenRouter Rate Limit Exceeded (429)."); return "Sorry, primary AI model is busy. Try again."
|
397 |
elif response.status_code == 500: logger.error(f"[Primary Summary] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, primary AI service internal error."
|
398 |
else:
|
399 |
-
|
400 |
-
error_info = ""
|
401 |
-
try:
|
402 |
-
error_info = response.json().get("error", {}).get("message", "")
|
403 |
-
except Exception:
|
404 |
-
pass
|
405 |
logger.error(f"[Primary Summary] Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
|
406 |
return f"Sorry, primary AI service returned unexpected status ({response.status_code})."
|
407 |
|
@@ -457,7 +477,7 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
|
|
457 |
if content:
|
458 |
logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
|
459 |
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
460 |
-
final_summary = await generate_summary(content, summary_type)
|
461 |
if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
|
462 |
else:
|
463 |
max_length = 4096; summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
|
|
|
1 |
+
# main.py (Updating Gemini Model and Safety Settings)
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
|
|
62 |
logging.getLogger('gunicorn.error').setLevel(logging.INFO)
|
63 |
logging.getLogger('uvicorn').setLevel(logging.INFO)
|
64 |
logging.getLogger('starlette').setLevel(logging.INFO)
|
|
|
65 |
if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
|
66 |
logger = logging.getLogger(__name__)
|
67 |
logger.info(f"Logging configured. Using BS4 parser: {DEFAULT_PARSER}")
|
|
|
87 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
88 |
GEMINI_API_KEY = get_secret('GEMINI_API_KEY')
|
89 |
|
|
|
90 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
91 |
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
|
92 |
+
# *** FIX: Update default Gemini model to gemini-2.0-flash ***
|
93 |
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
|
94 |
|
|
|
95 |
if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
96 |
if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Primary summarization will fail.")
|
97 |
+
_gemini_fallback_enabled = _gemini_available and bool(GEMINI_API_KEY)
|
98 |
+
if _gemini_fallback_enabled and not GEMINI_API_KEY: logger.warning("⚠️ WARNING: GEMINI_API_KEY found in env but value seems empty. Fallback disabled.")
|
99 |
+
elif not _gemini_fallback_enabled and _gemini_available : logger.warning("⚠️ WARNING: GEMINI_API_KEY not found. Fallback disabled.")
|
100 |
+
|
101 |
|
102 |
if not URLTOTEXT_API_KEY: pass
|
103 |
if not SUPADATA_API_KEY: pass
|
|
|
106 |
|
107 |
logger.info("Secret loading and configuration check finished.")
|
108 |
logger.info(f"Using OpenRouter Model (Primary): {OPENROUTER_MODEL}")
|
|
|
109 |
if _gemini_fallback_enabled: logger.info(f"Using Gemini Model (Fallback): {GEMINI_MODEL}")
|
110 |
+
else: logger.info("Gemini Fallback: Disabled")
|
111 |
logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
|
112 |
_apify_token_exists = bool(APIFY_API_TOKEN)
|
113 |
|
114 |
|
|
|
115 |
if _gemini_fallback_enabled:
|
116 |
+
try: genai.configure(api_key=GEMINI_API_KEY); logger.info("Google GenAI client configured successfully.")
|
117 |
+
except Exception as e: logger.error(f"Failed to configure Google GenAI client: {e}"); _gemini_fallback_enabled = False
|
|
|
|
|
|
|
|
|
118 |
|
119 |
# --- Retry Decorator ---
|
120 |
@retry( stop=stop_after_attempt(4), wait=wait_exponential(multiplier=1, min=2, max=15), retry=retry_if_exception_type((NetworkError, RetryAfter, TimedOut, BadRequest)), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True )
|
|
|
139 |
|
140 |
|
141 |
# --- Content Fetching Functions ---
|
142 |
+
# (fetch_url_content_for_scrape, get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript, get_website_content, get_website_content_via_api remain the same as previous version)
|
143 |
async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
|
144 |
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
|
145 |
try:
|
|
|
230 |
elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
|
231 |
elif response.status_code == 404:
|
232 |
error_info = "";
|
233 |
+
try: error_info = response.json().get("error", {}).get("message", "")
|
|
|
234 |
except Exception: pass
|
235 |
logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}");
|
236 |
return None
|
|
|
241 |
except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
|
242 |
except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
|
243 |
|
|
|
244 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
245 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
246 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
|
|
334 |
MAX_INPUT_LENGTH = 1000000
|
335 |
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Gemini Fallback] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
|
336 |
full_prompt = f"{prompt}\n\n{text}"
|
337 |
+
|
338 |
+
# *** FIX: Set all safety settings to BLOCK_NONE ***
|
339 |
+
safety_settings = {
|
340 |
+
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
|
341 |
+
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
|
342 |
+
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
|
343 |
+
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
|
344 |
+
# Note: CIVIC_INTEGRITY might not be available/needed for all models,
|
345 |
+
# but setting it won't hurt if the HarmCategory exists.
|
346 |
+
# If it causes an error later, remove this line.
|
347 |
+
getattr(HarmCategory, 'HARM_CATEGORY_CIVIC_INTEGRITY', None): HarmBlockThreshold.BLOCK_NONE
|
348 |
+
}
|
349 |
+
# Filter out None keys in case CIVIC_INTEGRITY doesn't exist
|
350 |
+
safety_settings = {k: v for k, v in safety_settings.items() if k is not None}
|
351 |
+
logger.debug(f"[Gemini Fallback] Using safety settings: {safety_settings}")
|
352 |
+
|
353 |
|
354 |
try:
|
355 |
logger.debug(f"[Gemini Fallback] Initializing model {GEMINI_MODEL}")
|
356 |
model = genai.GenerativeModel(GEMINI_MODEL)
|
357 |
logger.info(f"[Gemini Fallback] Sending request to Gemini ({GEMINI_MODEL})...")
|
358 |
+
request_options = {"timeout": 120}
|
359 |
+
response = await model.generate_content_async( full_prompt, safety_settings=safety_settings, request_options=request_options )
|
360 |
logger.info("[Gemini Fallback] Received response from Gemini.")
|
361 |
|
362 |
+
# Checking block reason is still useful for debugging, though unlikely now
|
363 |
+
if response.prompt_feedback.block_reason: logger.warning(f"[Gemini Fallback] Request blocked unexpectedly. Reason: {response.prompt_feedback.block_reason}"); # Changed to warning
|
364 |
+
# Check candidates for safety blocks as well
|
365 |
+
for cand in response.candidates:
|
366 |
+
if cand.finish_reason == 'SAFETY':
|
367 |
+
logger.warning(f"[Gemini Fallback] Candidate blocked due to SAFETY. Ratings: {cand.safety_ratings}")
|
368 |
+
|
369 |
+
|
370 |
+
# Attempt to get text even if blocked (might be None)
|
371 |
+
try:
|
372 |
+
summary = response.text
|
373 |
+
except ValueError as e:
|
374 |
+
# Handle cases where accessing response.text raises ValueError (e.g., blocked content)
|
375 |
+
logger.warning(f"[Gemini Fallback] Error accessing response text (likely blocked content): {e}")
|
376 |
+
summary = None # Ensure summary is None
|
377 |
+
|
378 |
if summary: logger.info(f"[Gemini Fallback] Success generating summary. Output len: {len(summary)}"); return summary.strip().replace('_', r'\_').replace('*', r'\*').replace('[', r'\[').replace('`', r'\`')
|
379 |
+
else: logger.warning(f"[Gemini Fallback] Gemini returned empty summary or content was blocked. Finish reason: {response.candidates[0].finish_reason if response.candidates else 'N/A'}"); return "Sorry, the fallback AI model did not provide a summary (possibly due to content filters)."
|
380 |
except Exception as e: logger.error(f"[Gemini Fallback] Unexpected error during Gemini API call: {e}", exc_info=True); return "Sorry, an unexpected error occurred while using the fallback AI service."
|
381 |
|
382 |
+
|
383 |
async def generate_summary(text: str, summary_type: str) -> str:
|
384 |
+
"""Generates summary using OpenRouter (Primary) with Gemini fallback on 10s ReadTimeout."""
|
385 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL, _gemini_fallback_enabled
|
386 |
logger.info(f"[Primary Summary] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
387 |
if not OPENROUTER_API_KEY: logger.error("[Primary Summary] OpenRouter key missing."); return "Error: AI model configuration key missing."
|
|
|
392 |
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"[Primary Summary] Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
|
393 |
full_prompt = f"{prompt}\n\n{text}"
|
394 |
headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
|
395 |
+
api_timeouts = httpx.Timeout(15.0, read=10.0, write=15.0, pool=60.0) # 10 second read timeout
|
396 |
+
response = None
|
397 |
|
398 |
try:
|
399 |
async with httpx.AsyncClient(timeout=api_timeouts) as client:
|
|
|
421 |
elif response.status_code == 429: logger.warning("[Primary Summary] OpenRouter Rate Limit Exceeded (429)."); return "Sorry, primary AI model is busy. Try again."
|
422 |
elif response.status_code == 500: logger.error(f"[Primary Summary] OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, primary AI service internal error."
|
423 |
else:
|
424 |
+
error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
|
|
|
|
|
|
|
|
|
|
|
425 |
logger.error(f"[Primary Summary] Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
|
426 |
return f"Sorry, primary AI service returned unexpected status ({response.status_code})."
|
427 |
|
|
|
477 |
if content:
|
478 |
logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary.")
|
479 |
await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
|
480 |
+
final_summary = await generate_summary(content, summary_type) # This now handles the fallback internally
|
481 |
if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"): user_feedback_message = final_summary; logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
|
482 |
else:
|
483 |
max_length = 4096; summary_parts = [final_summary[i:i+max_length] for i in range(0, len(final_summary), max_length)]
|