fmab777 commited on
Commit
f996499
Β·
verified Β·
1 Parent(s): 332d5fc

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +25 -32
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Correcting Supadata URL and Apify Endpoint/Logic)
2
  import os
3
  import re
4
  import logging
@@ -71,7 +71,7 @@ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
71
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
72
 
73
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
74
- APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") # Keep karamelo as per docs
75
 
76
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
77
  if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
@@ -132,8 +132,7 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
132
  if not video_id: logger.error("[Supadata] No video_id provided"); return None
133
  if not api_key: logger.error("[Supadata] API key missing."); return None
134
  logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}")
135
- # *** FIX: Use correct base URL ***
136
- api_endpoint = "https://api.supadata.ai/v1/youtube/transcript"
137
  params = {"videoId": video_id, "format": "text"}; headers = {"X-API-Key": api_key}
138
  try:
139
  async with httpx.AsyncClient(timeout=30.0) as client:
@@ -145,7 +144,7 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
145
  except json.JSONDecodeError: data = None
146
  content = None
147
  if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
148
- if not content and response.text: content = response.text # Check plain text response
149
  if content and isinstance(content, str): logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}"); return content.strip()
150
  else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
151
  except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
@@ -154,8 +153,7 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
154
  else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
155
  except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
156
  except httpx.RequestError as e:
157
- # SSL errors usually fall under RequestError
158
- if "CERTIFICATE_VERIFY_FAILED" in str(e): logger.error(f"[Supadata] SSL Cert Verify Failed for {video_id}: {e}")
159
  else: logger.error(f"[Supadata] Request error for {video_id}: {e}")
160
  return None
161
  except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
@@ -167,44 +165,31 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
167
  if not api_token: logger.error("[Apify SyncItems] API token missing."); return None
168
  logger.info(f"[Apify SyncItems] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
169
 
170
- # *** FIX: Use the run-sync-get-dataset-items endpoint ***
171
  sync_items_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
172
  params = {"token": api_token} # Token in param as per OpenAPI spec for this endpoint
173
- payload = {
174
- "urls": [video_url],
175
- "outputFormat": "singleStringText",
176
- "maxRetries": 5,
177
- "channelHandleBoolean": False, "channelNameBoolean": False,
178
- "datePublishedBoolean": False, "relativeDateTextBoolean": False,
179
- }
180
  headers = {"Content-Type": "application/json"} # No Auth header needed if token in params
181
 
182
  try:
183
- # Use a longer timeout for this synchronous endpoint
184
  async with httpx.AsyncClient(timeout=120.0) as client:
185
- log_headers = {k: v for k, v in headers.items()} # Log headers
186
  logger.debug(f"[Apify SyncItems] POST Request Details:\nURL: {sync_items_endpoint}\nParams: {params}\nHeaders: {log_headers}\nPayload: {json.dumps(payload)}")
187
- # *** FIX: POST to the sync items endpoint ***
188
  response = await client.post(sync_items_endpoint, headers=headers, params=params, json=payload)
189
  logger.debug(f"[Apify SyncItems] Received status code {response.status_code} for {video_url}")
190
 
191
- # *** FIX: Expect 200 OK for this endpoint ***
192
  if response.status_code == 200:
193
  try:
194
- # Response body *is* the dataset items array
195
  results = response.json()
196
  if isinstance(results, list) and len(results) > 0:
197
  item = results[0]
198
- # Parsing logic (same as before)
199
  content = None
200
- if "captions" in item and isinstance(item["captions"], str): content = item["captions"]
201
- elif "text" in item and isinstance(item["text"], str): content = item["text"]
202
- elif "transcript" in item and isinstance(item["transcript"], str): content = item["transcript"]
203
  elif "captions" in item and isinstance(item["captions"], list):
204
  logger.warning("[Apify SyncItems] Received list format for 'captions' unexpectedly. Processing...")
205
  if len(item["captions"]) > 0 and isinstance(item["captions"][0], dict) and 'text' in item["captions"][0]: content = " ".join(line.get("text", "") for line in item["captions"] if line.get("text"))
206
  elif len(item["captions"]) > 0 and isinstance(item["captions"][0], str): content = " ".join(item["captions"])
207
-
208
  if content and isinstance(content, str): logger.info(f"[Apify SyncItems] Success via REST for {video_url}. Length: {len(content)}"); return content.strip()
209
  else: logger.warning(f"[Apify SyncItems] Dataset item parsed but transcript content empty/invalid format for {video_url}. Item keys: {list(item.keys())}"); return None
210
  else: logger.warning(f"[Apify SyncItems] Actor success but dataset was empty for {video_url}. Response: {results}"); return None
@@ -212,7 +197,7 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
212
  except Exception as e: logger.error(f"[Apify SyncItems] Error processing success response for {video_url}: {e}", exc_info=True); return None
213
  elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
214
  elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
215
- elif response.status_code == 404: # This was the error before, log details if it happens again
216
  error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
217
  logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}"); return None
218
  else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
@@ -223,7 +208,7 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
223
  except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
224
 
225
 
226
- # (get_youtube_transcript, get_website_content, get_website_content_via_api, generate_summary remain the same)
227
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
228
  global SUPADATA_API_KEY, APIFY_API_TOKEN
229
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
@@ -248,9 +233,9 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
248
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
249
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
250
  if transcript_text is None:
251
- logger.info("[Fallback YT 2] Trying Apify REST API (SyncItems)...") # Updated log
252
  if APIFY_API_TOKEN:
253
- transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN) # Calls updated func
254
  if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify SyncItems REST for {video_url}"); return transcript_text
255
  else: logger.warning(f"[Fallback YT 2] Apify SyncItems REST failed or no content for {video_url}.")
256
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
@@ -305,7 +290,7 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
305
  except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
306
  except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
307
 
308
-
309
  async def generate_summary(text: str, summary_type: str) -> str:
310
  global OPENROUTER_API_KEY, OPENROUTER_MODEL
311
  logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
@@ -340,7 +325,15 @@ async def generate_summary(text: str, summary_type: str) -> str:
340
  elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
341
  elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
342
  elif response.status_code == 500: logger.error(f"OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, AI service internal error."
343
- else: logger.error(f"Unexpected status {response.status_code} from OpenRouter. Resp:{response.text[:500]}"); return f"Sorry, AI service returned unexpected status ({response.status_code})."
 
 
 
 
 
 
 
 
344
  except httpx.ReadTimeout: logger.error(f"Read Timeout error ({api_timeouts.read}s) waiting for OpenRouter API response."); return f"Sorry, the request to the AI model timed out after {api_timeouts.read} seconds while waiting for a response. The content might be too long or the service busy. Please try again later or with shorter content."
345
  except httpx.TimeoutException as e: logger.error(f"Timeout error ({type(e)}) connecting to/writing to OpenRouter API: {e}"); return "Sorry, the request to the AI model timed out. Please try again."
346
  except httpx.RequestError as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the AI model service."
@@ -349,7 +342,7 @@ async def generate_summary(text: str, summary_type: str) -> str:
349
  if response: logger.error(f"--> Last response status before error: {response.status_code}")
350
  return "Sorry, an unexpected error occurred while trying to generate the summary."
351
 
352
-
353
  async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
354
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
355
  background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
 
1
+ # main.py (Correcting SyntaxError at line 216)
2
  import os
3
  import re
4
  import logging
 
71
  WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
72
 
73
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
74
+ APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") # Using ~ as per last attempt
75
 
76
  if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
77
  if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
 
132
  if not video_id: logger.error("[Supadata] No video_id provided"); return None
133
  if not api_key: logger.error("[Supadata] API key missing."); return None
134
  logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}")
135
+ api_endpoint = "https://api.supadata.ai/v1/youtube/transcript" # Corrected URL
 
136
  params = {"videoId": video_id, "format": "text"}; headers = {"X-API-Key": api_key}
137
  try:
138
  async with httpx.AsyncClient(timeout=30.0) as client:
 
144
  except json.JSONDecodeError: data = None
145
  content = None
146
  if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
147
+ if not content and response.text: content = response.text
148
  if content and isinstance(content, str): logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}"); return content.strip()
149
  else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
150
  except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
 
153
  else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
154
  except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
155
  except httpx.RequestError as e:
156
+ if "CERTIFICATE_VERIFY_FAILED" in str(e): logger.error(f"[Supadata] SSL Cert Verify Failed for {video_id}: {e}") # Should be fixed now
 
157
  else: logger.error(f"[Supadata] Request error for {video_id}: {e}")
158
  return None
159
  except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
 
165
  if not api_token: logger.error("[Apify SyncItems] API token missing."); return None
166
  logger.info(f"[Apify SyncItems] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
167
 
 
168
  sync_items_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
169
  params = {"token": api_token} # Token in param as per OpenAPI spec for this endpoint
170
+ payload = { "urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5, "channelHandleBoolean": False, "channelNameBoolean": False, "datePublishedBoolean": False, "relativeDateTextBoolean": False, }
 
 
 
 
 
 
171
  headers = {"Content-Type": "application/json"} # No Auth header needed if token in params
172
 
173
  try:
 
174
  async with httpx.AsyncClient(timeout=120.0) as client:
175
+ log_headers = {k: v for k, v in headers.items()}
176
  logger.debug(f"[Apify SyncItems] POST Request Details:\nURL: {sync_items_endpoint}\nParams: {params}\nHeaders: {log_headers}\nPayload: {json.dumps(payload)}")
 
177
  response = await client.post(sync_items_endpoint, headers=headers, params=params, json=payload)
178
  logger.debug(f"[Apify SyncItems] Received status code {response.status_code} for {video_url}")
179
 
 
180
  if response.status_code == 200:
181
  try:
 
182
  results = response.json()
183
  if isinstance(results, list) and len(results) > 0:
184
  item = results[0]
 
185
  content = None
186
+ if "captions" in item and isinstance(item["captions"], str): logger.info("[Apify SyncItems] Found 'captions' key with string content."); content = item["captions"]
187
+ elif "text" in item and isinstance(item["text"], str): logger.info("[Apify SyncItems] Found 'text' key with string content."); content = item["text"]
188
+ elif "transcript" in item and isinstance(item["transcript"], str): logger.info("[Apify SyncItems] Found 'transcript' key with string content."); content = item["transcript"]
189
  elif "captions" in item and isinstance(item["captions"], list):
190
  logger.warning("[Apify SyncItems] Received list format for 'captions' unexpectedly. Processing...")
191
  if len(item["captions"]) > 0 and isinstance(item["captions"][0], dict) and 'text' in item["captions"][0]: content = " ".join(line.get("text", "") for line in item["captions"] if line.get("text"))
192
  elif len(item["captions"]) > 0 and isinstance(item["captions"][0], str): content = " ".join(item["captions"])
 
193
  if content and isinstance(content, str): logger.info(f"[Apify SyncItems] Success via REST for {video_url}. Length: {len(content)}"); return content.strip()
194
  else: logger.warning(f"[Apify SyncItems] Dataset item parsed but transcript content empty/invalid format for {video_url}. Item keys: {list(item.keys())}"); return None
195
  else: logger.warning(f"[Apify SyncItems] Actor success but dataset was empty for {video_url}. Response: {results}"); return None
 
197
  except Exception as e: logger.error(f"[Apify SyncItems] Error processing success response for {video_url}: {e}", exc_info=True); return None
198
  elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
199
  elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
200
+ elif response.status_code == 404:
201
  error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
202
  logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}"); return None
203
  else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
 
208
  except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
209
 
210
 
211
+ # (get_youtube_transcript, get_website_content, get_website_content_via_api remain the same)
212
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
213
  global SUPADATA_API_KEY, APIFY_API_TOKEN
214
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
 
233
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
234
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
235
  if transcript_text is None:
236
+ logger.info("[Fallback YT 2] Trying Apify REST API (SyncItems)...")
237
  if APIFY_API_TOKEN:
238
+ transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
239
  if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify SyncItems REST for {video_url}"); return transcript_text
240
  else: logger.warning(f"[Fallback YT 2] Apify SyncItems REST failed or no content for {video_url}.")
241
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
 
290
  except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
291
  except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
292
 
293
+ # --- Summarization Function ---
294
  async def generate_summary(text: str, summary_type: str) -> str:
295
  global OPENROUTER_API_KEY, OPENROUTER_MODEL
296
  logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
 
325
  elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
326
  elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
327
  elif response.status_code == 500: logger.error(f"OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, AI service internal error."
328
+ else:
329
+ # *** FIX: Corrected Indentation for error info extraction ***
330
+ error_info = ""
331
+ try:
332
+ error_info = response.json().get("error", {}).get("message", "")
333
+ except Exception:
334
+ pass
335
+ logger.error(f"Unexpected status {response.status_code} from OpenRouter. Error: '{error_info}' Resp:{response.text[:500]}");
336
+ return f"Sorry, AI service returned unexpected status ({response.status_code})."
337
  except httpx.ReadTimeout: logger.error(f"Read Timeout error ({api_timeouts.read}s) waiting for OpenRouter API response."); return f"Sorry, the request to the AI model timed out after {api_timeouts.read} seconds while waiting for a response. The content might be too long or the service busy. Please try again later or with shorter content."
338
  except httpx.TimeoutException as e: logger.error(f"Timeout error ({type(e)}) connecting to/writing to OpenRouter API: {e}"); return "Sorry, the request to the AI model timed out. Please try again."
339
  except httpx.RequestError as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the AI model service."
 
342
  if response: logger.error(f"--> Last response status before error: {response.status_code}")
343
  return "Sorry, an unexpected error occurred while trying to generate the summary."
344
 
345
+ # (process_summary_task, handlers, setup, lifespan, routes, etc. remain the same)
346
  async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
347
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
348
  background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None