Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# main.py (Correcting
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
@@ -71,7 +71,7 @@ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
|
71 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
72 |
|
73 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
74 |
-
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
|
75 |
|
76 |
if not TELEGRAM_TOKEN: logger.critical("β FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
77 |
if not OPENROUTER_API_KEY: logger.error("β ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
|
@@ -132,7 +132,8 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
|
|
132 |
if not video_id: logger.error("[Supadata] No video_id provided"); return None
|
133 |
if not api_key: logger.error("[Supadata] API key missing."); return None
|
134 |
logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}")
|
135 |
-
|
|
|
136 |
params = {"videoId": video_id, "format": "text"}; headers = {"X-API-Key": api_key}
|
137 |
try:
|
138 |
async with httpx.AsyncClient(timeout=30.0) as client:
|
@@ -144,7 +145,7 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
|
|
144 |
except json.JSONDecodeError: data = None
|
145 |
content = None
|
146 |
if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
|
147 |
-
if not content and response.text: content = response.text
|
148 |
if content and isinstance(content, str): logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}"); return content.strip()
|
149 |
else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
|
150 |
except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
|
@@ -153,94 +154,73 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
|
|
153 |
else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
|
154 |
except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
|
155 |
except httpx.RequestError as e:
|
|
|
156 |
if "CERTIFICATE_VERIFY_FAILED" in str(e): logger.error(f"[Supadata] SSL Cert Verify Failed for {video_id}: {e}")
|
157 |
else: logger.error(f"[Supadata] Request error for {video_id}: {e}")
|
158 |
return None
|
159 |
except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
|
160 |
|
161 |
-
|
162 |
async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
|
163 |
-
"""Fetches YouTube transcript using Apify REST API (
|
164 |
-
global APIFY_ACTOR_ID
|
165 |
-
if not video_url: logger.error("[Apify
|
166 |
-
if not api_token: logger.error("[Apify
|
167 |
-
logger.info(f"[Apify
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
|
|
|
|
|
|
|
|
176 |
|
177 |
try:
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
if not run_id or not dataset_id: logger.error(f"[Apify Async] Started run but missing runId or datasetId. Data: {run_data}"); return None
|
189 |
-
logger.info(f"[Apify Async] Run started. Run ID: {run_id}, Dataset ID: {dataset_id}")
|
190 |
-
except Exception as e: logger.error(f"[Apify Async] Error parsing start run response: {e}. Response: {response_start.text[:200]}", exc_info=True); return None
|
191 |
-
else:
|
192 |
-
# *** FIX: Correct Indentation for try/except block ***
|
193 |
-
error_info = ""
|
194 |
-
try:
|
195 |
-
# Attempt to get the error message from the JSON response
|
196 |
-
error_info = response_start.json().get("error", {}).get("message", "")
|
197 |
-
except Exception:
|
198 |
-
# If parsing fails or structure is unexpected, just pass
|
199 |
-
pass
|
200 |
-
logger.error(f"[Apify Async] Failed to start run. Status: {response_start.status_code}. Error: '{error_info}' Resp: {response_start.text[:200]}")
|
201 |
-
return None
|
202 |
-
|
203 |
-
run_status_endpoint = f"https://api.apify.com/v2/actor-runs/{run_id}"; elapsed_time = 0; final_status = None
|
204 |
-
while elapsed_time < max_wait_seconds:
|
205 |
-
await asyncio.sleep(poll_interval); elapsed_time += poll_interval
|
206 |
-
logger.debug(f"[Apify Async] Polling status for run {run_id} ({elapsed_time}s elapsed)")
|
207 |
try:
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
else: logger.warning(f"[Apify Async] Dataset {dataset_id} was empty for {video_url}. Response: {results}"); return None
|
239 |
-
|
240 |
-
except httpx.TimeoutException as e: logger.error(f"[Apify Async] Timeout during API interaction for {video_url}: {e}"); return None
|
241 |
-
except httpx.HTTPStatusError as e: logger.error(f"[Apify Async] HTTP Status Error during API interaction for {video_url}: {e}"); return None
|
242 |
-
except httpx.RequestError as e: logger.error(f"[Apify Async] Request error during API interaction for {video_url}: {e}"); return None
|
243 |
-
except Exception as e: logger.error(f"[Apify Async] Unexpected error during Apify Async REST call for {video_url}: {e}", exc_info=True); return None
|
244 |
|
245 |
|
246 |
# (get_youtube_transcript, get_website_content, get_website_content_via_api, generate_summary remain the same)
|
@@ -268,11 +248,11 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
|
|
268 |
else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
|
269 |
else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
|
270 |
if transcript_text is None:
|
271 |
-
logger.info("[Fallback YT 2] Trying Apify REST API (
|
272 |
if APIFY_API_TOKEN:
|
273 |
-
transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
|
274 |
-
if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify
|
275 |
-
else: logger.warning(f"[Fallback YT 2] Apify
|
276 |
else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
|
277 |
if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
|
278 |
return transcript_text
|
@@ -325,6 +305,7 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
|
325 |
except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
|
326 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
327 |
|
|
|
328 |
async def generate_summary(text: str, summary_type: str) -> str:
|
329 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL
|
330 |
logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
|
|
1 |
+
# main.py (Correcting Supadata URL and Apify Endpoint/Logic)
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
|
|
71 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
72 |
|
73 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
74 |
+
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") # Keep karamelo as per docs
|
75 |
|
76 |
if not TELEGRAM_TOKEN: logger.critical("β FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
77 |
if not OPENROUTER_API_KEY: logger.error("β ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
|
|
|
132 |
if not video_id: logger.error("[Supadata] No video_id provided"); return None
|
133 |
if not api_key: logger.error("[Supadata] API key missing."); return None
|
134 |
logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}")
|
135 |
+
# *** FIX: Use correct base URL ***
|
136 |
+
api_endpoint = "https://api.supadata.ai/v1/youtube/transcript"
|
137 |
params = {"videoId": video_id, "format": "text"}; headers = {"X-API-Key": api_key}
|
138 |
try:
|
139 |
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
|
145 |
except json.JSONDecodeError: data = None
|
146 |
content = None
|
147 |
if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
|
148 |
+
if not content and response.text: content = response.text # Check plain text response
|
149 |
if content and isinstance(content, str): logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}"); return content.strip()
|
150 |
else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
|
151 |
except Exception as e: logger.error(f"[Supadata] Error processing success response for {video_id}: {e}", exc_info=True); return None
|
|
|
154 |
else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
|
155 |
except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
|
156 |
except httpx.RequestError as e:
|
157 |
+
# SSL errors usually fall under RequestError
|
158 |
if "CERTIFICATE_VERIFY_FAILED" in str(e): logger.error(f"[Supadata] SSL Cert Verify Failed for {video_id}: {e}")
|
159 |
else: logger.error(f"[Supadata] Request error for {video_id}: {e}")
|
160 |
return None
|
161 |
except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
|
162 |
|
|
|
163 |
async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
|
164 |
+
"""Fetches YouTube transcript using Apify REST API (run-sync-get-dataset-items endpoint)."""
|
165 |
+
global APIFY_ACTOR_ID # Should be karamelo~youtube-transcripts
|
166 |
+
if not video_url: logger.error("[Apify SyncItems] No video_url provided"); return None
|
167 |
+
if not api_token: logger.error("[Apify SyncItems] API token missing."); return None
|
168 |
+
logger.info(f"[Apify SyncItems] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
|
169 |
+
|
170 |
+
# *** FIX: Use the run-sync-get-dataset-items endpoint ***
|
171 |
+
sync_items_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
|
172 |
+
params = {"token": api_token} # Token in param as per OpenAPI spec for this endpoint
|
173 |
+
payload = {
|
174 |
+
"urls": [video_url],
|
175 |
+
"outputFormat": "singleStringText",
|
176 |
+
"maxRetries": 5,
|
177 |
+
"channelHandleBoolean": False, "channelNameBoolean": False,
|
178 |
+
"datePublishedBoolean": False, "relativeDateTextBoolean": False,
|
179 |
+
}
|
180 |
+
headers = {"Content-Type": "application/json"} # No Auth header needed if token in params
|
181 |
|
182 |
try:
|
183 |
+
# Use a longer timeout for this synchronous endpoint
|
184 |
+
async with httpx.AsyncClient(timeout=120.0) as client:
|
185 |
+
log_headers = {k: v for k, v in headers.items()} # Log headers
|
186 |
+
logger.debug(f"[Apify SyncItems] POST Request Details:\nURL: {sync_items_endpoint}\nParams: {params}\nHeaders: {log_headers}\nPayload: {json.dumps(payload)}")
|
187 |
+
# *** FIX: POST to the sync items endpoint ***
|
188 |
+
response = await client.post(sync_items_endpoint, headers=headers, params=params, json=payload)
|
189 |
+
logger.debug(f"[Apify SyncItems] Received status code {response.status_code} for {video_url}")
|
190 |
+
|
191 |
+
# *** FIX: Expect 200 OK for this endpoint ***
|
192 |
+
if response.status_code == 200:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
try:
|
194 |
+
# Response body *is* the dataset items array
|
195 |
+
results = response.json()
|
196 |
+
if isinstance(results, list) and len(results) > 0:
|
197 |
+
item = results[0]
|
198 |
+
# Parsing logic (same as before)
|
199 |
+
content = None
|
200 |
+
if "captions" in item and isinstance(item["captions"], str): content = item["captions"]
|
201 |
+
elif "text" in item and isinstance(item["text"], str): content = item["text"]
|
202 |
+
elif "transcript" in item and isinstance(item["transcript"], str): content = item["transcript"]
|
203 |
+
elif "captions" in item and isinstance(item["captions"], list):
|
204 |
+
logger.warning("[Apify SyncItems] Received list format for 'captions' unexpectedly. Processing...")
|
205 |
+
if len(item["captions"]) > 0 and isinstance(item["captions"][0], dict) and 'text' in item["captions"][0]: content = " ".join(line.get("text", "") for line in item["captions"] if line.get("text"))
|
206 |
+
elif len(item["captions"]) > 0 and isinstance(item["captions"][0], str): content = " ".join(item["captions"])
|
207 |
+
|
208 |
+
if content and isinstance(content, str): logger.info(f"[Apify SyncItems] Success via REST for {video_url}. Length: {len(content)}"); return content.strip()
|
209 |
+
else: logger.warning(f"[Apify SyncItems] Dataset item parsed but transcript content empty/invalid format for {video_url}. Item keys: {list(item.keys())}"); return None
|
210 |
+
else: logger.warning(f"[Apify SyncItems] Actor success but dataset was empty for {video_url}. Response: {results}"); return None
|
211 |
+
except json.JSONDecodeError: logger.error(f"[Apify SyncItems] Failed JSON decode. Status:{response.status_code}. Resp:{response.text[:200]}"); return None
|
212 |
+
except Exception as e: logger.error(f"[Apify SyncItems] Error processing success response for {video_url}: {e}", exc_info=True); return None
|
213 |
+
elif response.status_code == 400: logger.error(f"[Apify SyncItems] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
|
214 |
+
elif response.status_code == 401: logger.error("[Apify SyncItems] Auth error (401). Check token."); return None
|
215 |
+
elif response.status_code == 404: # This was the error before, log details if it happens again
|
216 |
+
error_info = ""; try: error_info = response.json().get("error", {}).get("message", "") except Exception: pass
|
217 |
+
logger.error(f"[Apify SyncItems] Endpoint/Actor Not Found (404). Error: '{error_info}' Resp:{response.text[:200]}"); return None
|
218 |
+
else: logger.error(f"[Apify SyncItems] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
|
219 |
+
|
220 |
+
except httpx.TimeoutException as e: logger.error(f"[Apify SyncItems] Timeout during API interaction for {video_url}: {e}"); return None
|
221 |
+
except httpx.HTTPStatusError as e: logger.error(f"[Apify SyncItems] HTTP Status Error during API interaction for {video_url}: {e}"); return None
|
222 |
+
except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
|
223 |
+
except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
|
226 |
# (get_youtube_transcript, get_website_content, get_website_content_via_api, generate_summary remain the same)
|
|
|
248 |
else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
|
249 |
else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
|
250 |
if transcript_text is None:
|
251 |
+
logger.info("[Fallback YT 2] Trying Apify REST API (SyncItems)...") # Updated log
|
252 |
if APIFY_API_TOKEN:
|
253 |
+
transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN) # Calls updated func
|
254 |
+
if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify SyncItems REST for {video_url}"); return transcript_text
|
255 |
+
else: logger.warning(f"[Fallback YT 2] Apify SyncItems REST failed or no content for {video_url}.")
|
256 |
else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
|
257 |
if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
|
258 |
return transcript_text
|
|
|
305 |
except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
|
306 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
307 |
|
308 |
+
|
309 |
async def generate_summary(text: str, summary_type: str) -> str:
|
310 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL
|
311 |
logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|