Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# main.py (Correcting
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
@@ -71,7 +71,7 @@ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
|
71 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
72 |
|
73 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
74 |
-
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") #
|
75 |
|
76 |
if not TELEGRAM_TOKEN: logger.critical("β FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
77 |
if not OPENROUTER_API_KEY: logger.error("β ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
|
@@ -132,8 +132,7 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
|
|
132 |
if not video_id: logger.error("[Supadata] No video_id provided"); return None
|
133 |
if not api_key: logger.error("[Supadata] API key missing."); return None
|
134 |
logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}")
|
135 |
-
|
136 |
-
api_endpoint = "https://api.supadata.ai/v1/youtube/transcript"
|
137 |
params = {"videoId": video_id, "format": "text"}; headers = {"X-API-Key": api_key}
|
138 |
try:
|
139 |
async with httpx.AsyncClient(timeout=30.0) as client:
|
@@ -145,7 +144,7 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
|
|
145 |
except json.JSONDecodeError: data = None
|
146 |
content = None
|
147 |
if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
|
148 |
-
if not content and response.text: content = response.text
|
149 |
if content and isinstance(content, str): logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}"); return content.strip()
|
150 |
else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
|
151 |
except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
|
@@ -154,8 +153,7 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
|
|
154 |
else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
|
155 |
except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
|
156 |
except httpx.RequestError as e:
|
157 |
-
|
158 |
-
if "CERTIFICATE_VERIFY_FAILED" in str(e): logger.error(f"[Supadata] SSL Cert Verify Failed for {video_id}: {e}")
|
159 |
else: logger.error(f"[Supadata] Request error for {video_id}: {e}")
|
160 |
return None
|
161 |
except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
|
@@ -167,44 +165,31 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
|
|
167 |
if not api_token: logger.error("[Apify SyncItems] API token missing."); return None
|
168 |
logger.info(f"[Apify SyncItems] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
|
169 |
|
170 |
-
# *** FIX: Use the run-sync-get-dataset-items endpoint ***
|
171 |
sync_items_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
|
172 |
params = {"token": api_token} # Token in param as per OpenAPI spec for this endpoint
|
173 |
-
payload = {
|
174 |
-
"urls": [video_url],
|
175 |
-
"outputFormat": "singleStringText",
|
176 |
-
"maxRetries": 5,
|
177 |
-
"channelHandleBoolean": False, "channelNameBoolean": False,
|
178 |
-
"datePublishedBoolean": False, "relativeDateTextBoolean": False,
|
179 |
-
}
|
180 |
headers = {"Content-Type": "application/json"} # No Auth header needed if token in params
|
181 |
|
182 |
try:
|
183 |
-
# Use a longer timeout for this synchronous endpoint
|
184 |
async with httpx.AsyncClient(timeout=120.0) as client:
|
185 |
-
log_headers = {k: v for k, v in headers.items()}
|
186 |
logger.debug(f"[Apify SyncItems] POST Request Details:\nURL: {sync_items_endpoint}\nParams: {params}\nHeaders: {log_headers}\nPayload: {json.dumps(payload)}")
|
187 |
-
# *** FIX: POST to the sync items endpoint ***
|
188 |
response = await client.post(sync_items_endpoint, headers=headers, params=params, json=payload)
|
189 |
logger.debug(f"[Apify SyncItems] Received status code {response.status_code} for {video_url}")
|
190 |
|
191 |
-
# *** FIX: Expect 200 OK for this endpoint ***
|
192 |
if response.status_code == 200:
|
193 |
try:
|
194 |
-
# Response body *is* the dataset items array
|
195 |
results = response.json()
|
196 |
if isinstance(results, list) and len(results) > 0:
|
197 |
item = results[0]
|
198 |
-
# Parsing logic (same as before)
|
199 |
content = None
|
200 |
-
if "captions" in item and isinstance(item["captions"], str): content = item["captions"]
|
201 |
-
elif "text" in item and isinstance(item["text"], str): content = item["text"]
|
202 |
-
elif "transcript" in item and isinstance(item["transcript"], str): content = item["transcript"]
|
203 |
elif "captions" in item and isinstance(item["captions"], list):
|
204 |
logger.warning("[Apify SyncItems] Received list format for 'captions' unexpectedly. Processing...")
|
205 |
if len(item["captions"]) > 0 and isinstance(item["captions"][0], dict) and 'text' in item["captions"][0]: content = " ".join(line.get("text", "") for line in item["captions"] if line.get("text"))
|
206 |
elif len(item["captions"]) > 0 and isinstance(item["captions"][0], str): content = " ".join(item["captions"])
|
207 |
-
|
208 |
if content and isinstance(content, str): logger.info(f"[Apify SyncItems] Success via REST for {video_url}. Length: {len(content)}"); return content.strip()
|
209 |
else: logger.warning(f"[Apify SyncItems] Dataset item parsed but transcript content empty/invalid format for {video_url}. Item keys: {list(item.keys())}"); return None
|
210 |
else: logger.warning(f"[Apify SyncItems] Actor success but dataset was empty for {video_url}. Response: {results}"); return None
|
@@ -212,7 +197,7 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
|
|
212 |
except Exception as e: logger.error(f"[Apify SyncItems] Error processing success response for {video_url}: {e}", exc_info=True); return None
|
213 |
elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
|
214 |
elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
|
215 |
-
elif response.status_code == 404:
|
216 |
error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
|
217 |
logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}"); return None
|
218 |
else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
|
@@ -223,7 +208,7 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
|
|
223 |
except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
|
224 |
|
225 |
|
226 |
-
# (get_youtube_transcript, get_website_content, get_website_content_via_api
|
227 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
228 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
229 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
@@ -248,9 +233,9 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
|
|
248 |
else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
|
249 |
else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
|
250 |
if transcript_text is None:
|
251 |
-
logger.info("[Fallback YT 2] Trying Apify REST API (SyncItems)...")
|
252 |
if APIFY_API_TOKEN:
|
253 |
-
transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
|
254 |
if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify SyncItems REST for {video_url}"); return transcript_text
|
255 |
else: logger.warning(f"[Fallback YT 2] Apify SyncItems REST failed or no content for {video_url}.")
|
256 |
else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
|
@@ -305,7 +290,7 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
|
305 |
except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
|
306 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
307 |
|
308 |
-
|
309 |
async def generate_summary(text: str, summary_type: str) -> str:
|
310 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL
|
311 |
logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
@@ -340,7 +325,15 @@ async def generate_summary(text: str, summary_type: str) -> str:
|
|
340 |
elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
|
341 |
elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
|
342 |
elif response.status_code == 500: logger.error(f"OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, AI service internal error."
|
343 |
-
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
344 |
except httpx.ReadTimeout: logger.error(f"Read Timeout error ({api_timeouts.read}s) waiting for OpenRouter API response."); return f"Sorry, the request to the AI model timed out after {api_timeouts.read} seconds while waiting for a response. The content might be too long or the service busy. Please try again later or with shorter content."
|
345 |
except httpx.TimeoutException as e: logger.error(f"Timeout error ({type(e)}) connecting to/writing to OpenRouter API: {e}"); return "Sorry, the request to the AI model timed out. Please try again."
|
346 |
except httpx.RequestError as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the AI model service."
|
@@ -349,7 +342,7 @@ async def generate_summary(text: str, summary_type: str) -> str:
|
|
349 |
if response: logger.error(f"--> Last response status before error: {response.status_code}")
|
350 |
return "Sorry, an unexpected error occurred while trying to generate the summary."
|
351 |
|
352 |
-
|
353 |
async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
|
354 |
task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
|
355 |
background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
|
|
|
1 |
+
# main.py (Correcting SyntaxError at line 216)
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
|
|
71 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
72 |
|
73 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
74 |
+
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") # Using ~ as per last attempt
|
75 |
|
76 |
if not TELEGRAM_TOKEN: logger.critical("β FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
77 |
if not OPENROUTER_API_KEY: logger.error("β ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
|
|
|
132 |
if not video_id: logger.error("[Supadata] No video_id provided"); return None
|
133 |
if not api_key: logger.error("[Supadata] API key missing."); return None
|
134 |
logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}")
|
135 |
+
api_endpoint = "https://api.supadata.ai/v1/youtube/transcript" # Corrected URL
|
|
|
136 |
params = {"videoId": video_id, "format": "text"}; headers = {"X-API-Key": api_key}
|
137 |
try:
|
138 |
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
|
144 |
except json.JSONDecodeError: data = None
|
145 |
content = None
|
146 |
if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
|
147 |
+
if not content and response.text: content = response.text
|
148 |
if content and isinstance(content, str): logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}"); return content.strip()
|
149 |
else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
|
150 |
except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
|
|
|
153 |
else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
|
154 |
except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
|
155 |
except httpx.RequestError as e:
|
156 |
+
if "CERTIFICATE_VERIFY_FAILED" in str(e): logger.error(f"[Supadata] SSL Cert Verify Failed for {video_id}: {e}") # Should be fixed now
|
|
|
157 |
else: logger.error(f"[Supadata] Request error for {video_id}: {e}")
|
158 |
return None
|
159 |
except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
|
|
|
165 |
if not api_token: logger.error("[Apify SyncItems] API token missing."); return None
|
166 |
logger.info(f"[Apify SyncItems] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
|
167 |
|
|
|
168 |
sync_items_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
|
169 |
params = {"token": api_token} # Token in param as per OpenAPI spec for this endpoint
|
170 |
+
payload = { "urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5, "channelHandleBoolean": False, "channelNameBoolean": False, "datePublishedBoolean": False, "relativeDateTextBoolean": False, }
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
headers = {"Content-Type": "application/json"} # No Auth header needed if token in params
|
172 |
|
173 |
try:
|
|
|
174 |
async with httpx.AsyncClient(timeout=120.0) as client:
|
175 |
+
log_headers = {k: v for k, v in headers.items()}
|
176 |
logger.debug(f"[Apify SyncItems] POST Request Details:\nURL: {sync_items_endpoint}\nParams: {params}\nHeaders: {log_headers}\nPayload: {json.dumps(payload)}")
|
|
|
177 |
response = await client.post(sync_items_endpoint, headers=headers, params=params, json=payload)
|
178 |
logger.debug(f"[Apify SyncItems] Received status code {response.status_code} for {video_url}")
|
179 |
|
|
|
180 |
if response.status_code == 200:
|
181 |
try:
|
|
|
182 |
results = response.json()
|
183 |
if isinstance(results, list) and len(results) > 0:
|
184 |
item = results[0]
|
|
|
185 |
content = None
|
186 |
+
if "captions" in item and isinstance(item["captions"], str): logger.info("[Apify SyncItems] Found 'captions' key with string content."); content = item["captions"]
|
187 |
+
elif "text" in item and isinstance(item["text"], str): logger.info("[Apify SyncItems] Found 'text' key with string content."); content = item["text"]
|
188 |
+
elif "transcript" in item and isinstance(item["transcript"], str): logger.info("[Apify SyncItems] Found 'transcript' key with string content."); content = item["transcript"]
|
189 |
elif "captions" in item and isinstance(item["captions"], list):
|
190 |
logger.warning("[Apify SyncItems] Received list format for 'captions' unexpectedly. Processing...")
|
191 |
if len(item["captions"]) > 0 and isinstance(item["captions"][0], dict) and 'text' in item["captions"][0]: content = " ".join(line.get("text", "") for line in item["captions"] if line.get("text"))
|
192 |
elif len(item["captions"]) > 0 and isinstance(item["captions"][0], str): content = " ".join(item["captions"])
|
|
|
193 |
if content and isinstance(content, str): logger.info(f"[Apify SyncItems] Success via REST for {video_url}. Length: {len(content)}"); return content.strip()
|
194 |
else: logger.warning(f"[Apify SyncItems] Dataset item parsed but transcript content empty/invalid format for {video_url}. Item keys: {list(item.keys())}"); return None
|
195 |
else: logger.warning(f"[Apify SyncItems] Actor success but dataset was empty for {video_url}. Response: {results}"); return None
|
|
|
197 |
except Exception as e: logger.error(f"[Apify SyncItems] Error processing success response for {video_url}: {e}", exc_info=True); return None
|
198 |
elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
|
199 |
elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
|
200 |
+
elif response.status_code == 404:
|
201 |
error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
|
202 |
logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}"); return None
|
203 |
else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
|
|
|
208 |
except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
|
209 |
|
210 |
|
211 |
+
# (get_youtube_transcript, get_website_content, get_website_content_via_api remain the same)
|
212 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
213 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
214 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
|
|
233 |
else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
|
234 |
else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
|
235 |
if transcript_text is None:
|
236 |
+
logger.info("[Fallback YT 2] Trying Apify REST API (SyncItems)...")
|
237 |
if APIFY_API_TOKEN:
|
238 |
+
transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
|
239 |
if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify SyncItems REST for {video_url}"); return transcript_text
|
240 |
else: logger.warning(f"[Fallback YT 2] Apify SyncItems REST failed or no content for {video_url}.")
|
241 |
else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
|
|
|
290 |
except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
|
291 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
292 |
|
293 |
+
# --- Summarization Function ---
|
294 |
async def generate_summary(text: str, summary_type: str) -> str:
|
295 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL
|
296 |
logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
|
|
325 |
elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
|
326 |
elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
|
327 |
elif response.status_code == 500: logger.error(f"OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, AI service internal error."
|
328 |
+
else:
|
329 |
+
# *** FIX: Corrected Indentation for error info extraction ***
|
330 |
+
error_info = ""
|
331 |
+
try:
|
332 |
+
error_info = response.json().get("error", {}).get("message", "")
|
333 |
+
except Exception:
|
334 |
+
pass
|
335 |
+
logger.error(f"Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
|
336 |
+
return f"Sorry, AI service returned unexpected status ({response.status_code})."
|
337 |
except httpx.ReadTimeout: logger.error(f"Read Timeout error ({api_timeouts.read}s) waiting for OpenRouter API response."); return f"Sorry, the request to the AI model timed out after {api_timeouts.read} seconds while waiting for a response. The content might be too long or the service busy. Please try again later or with shorter content."
|
338 |
except httpx.TimeoutException as e: logger.error(f"Timeout error ({type(e)}) connecting to/writing to OpenRouter API: {e}"); return "Sorry, the request to the AI model timed out. Please try again."
|
339 |
except httpx.RequestError as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the AI model service."
|
|
|
342 |
if response: logger.error(f"--> Last response status before error: {response.status_code}")
|
343 |
return "Sorry, an unexpected error occurred while trying to generate the summary."
|
344 |
|
345 |
+
# (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
|
346 |
async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
|
347 |
task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
|
348 |
background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
|