Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -239,19 +239,43 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
|
|
239 |
return None
|
240 |
except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
|
241 |
|
|
|
242 |
async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
|
243 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
global APIFY_ACTOR_ID
|
245 |
-
|
246 |
-
#
|
247 |
-
|
248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
return await _run_apify_actor_for_web_content(
|
250 |
-
url=video_url,
|
251 |
api_token=api_token,
|
252 |
actor_id=APIFY_ACTOR_ID,
|
253 |
-
|
254 |
-
actor_name="Apify YT Default (Fallback 1)"
|
255 |
)
|
256 |
|
257 |
async def get_transcript_via_apify_structured_extractor(video_url: str, api_token: str) -> Optional[str]:
|
@@ -683,18 +707,6 @@ async def _run_apify_actor_for_web_content(url: str, api_token: str, actor_id: s
|
|
683 |
except httpx.RequestError as e: logger.error(f"{log_prefix} Request error during API interaction for {url}: {e}"); return None
|
684 |
except Exception as e: logger.error(f"{log_prefix} Unexpected error during {actor_name} call for {url}: {e}", exc_info=True); return None
|
685 |
|
686 |
-
async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
|
687 |
-
"""Fallback YT 2: Fetches YouTube transcript using default Apify Actor via generic function."""
|
688 |
-
global APIFY_ACTOR_ID
|
689 |
-
# The specific run_input logic is now handled within _run_apify_actor_for_web_content
|
690 |
-
# when it detects the actor_id matches APIFY_ACTOR_ID
|
691 |
-
logger.debug(f"[get_transcript_via_apify] Calling generic runner for URL: {video_url}")
|
692 |
-
return await _run_apify_actor_for_web_content(
|
693 |
-
url=video_url, # Pass video_url as the 'url' parameter
|
694 |
-
api_token=api_token,
|
695 |
-
actor_id=APIFY_ACTOR_ID,
|
696 |
-
actor_name="Apify YT" # Keep specific name for logging clarity
|
697 |
-
)
|
698 |
|
699 |
async def get_website_content_via_apify_crawler(url: str, api_token: str) -> Optional[str]:
|
700 |
"""Fallback 4: Fetches website content using Apify Website Content Crawler."""
|
|
|
239 |
return None
|
240 |
except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
|
241 |
|
242 |
+
# --- YouTube fallback 1: Apify default transcript actor (unique definition – delete any duplicates) ---
|
243 |
async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
|
244 |
+
"""
|
245 |
+
Fetch the YouTube transcript with Apify’s default “YouTube Transcript” actor
|
246 |
+
(ID `1s7eXiaukVuOr4Ueg`).
|
247 |
+
The helper `_run_apify_actor_for_web_content` supplies the correct `run_input`
|
248 |
+
(including the residential proxy group), handles retries, and parses the returned
|
249 |
+
dataset into plain text.
|
250 |
+
|
251 |
+
Parameters
|
252 |
+
----------
|
253 |
+
video_url : str
|
254 |
+
Full YouTube URL supplied by the user.
|
255 |
+
api_token : str
|
256 |
+
Apify API token that has residential proxy credit.
|
257 |
+
|
258 |
+
Returns
|
259 |
+
-------
|
260 |
+
Optional[str]
|
261 |
+
Combined transcript text or `None` if all attempts fail.
|
262 |
+
"""
|
263 |
global APIFY_ACTOR_ID
|
264 |
+
|
265 |
+
# Validate arguments
|
266 |
+
if not video_url:
|
267 |
+
logger.error("[Apify YT] No video_url provided")
|
268 |
+
return None
|
269 |
+
if not api_token:
|
270 |
+
logger.error("[Apify YT] API token missing.")
|
271 |
+
return None
|
272 |
+
|
273 |
+
logger.info(f"[Apify YT] Attempting transcript fetch via actor {APIFY_ACTOR_ID}")
|
274 |
return await _run_apify_actor_for_web_content(
|
275 |
+
url=video_url,
|
276 |
api_token=api_token,
|
277 |
actor_id=APIFY_ACTOR_ID,
|
278 |
+
actor_name="Apify YT Default (Fallback 1)"
|
|
|
279 |
)
|
280 |
|
281 |
async def get_transcript_via_apify_structured_extractor(video_url: str, api_token: str) -> Optional[str]:
|
|
|
707 |
except httpx.RequestError as e: logger.error(f"{log_prefix} Request error during API interaction for {url}: {e}"); return None
|
708 |
except Exception as e: logger.error(f"{log_prefix} Unexpected error during {actor_name} call for {url}: {e}", exc_info=True); return None
|
709 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
710 |
|
711 |
async def get_website_content_via_apify_crawler(url: str, api_token: str) -> Optional[str]:
|
712 |
"""Fallback 4: Fetches website content using Apify Website Content Crawler."""
|