fmab777 commited on
Commit
fe05003
·
verified ·
1 Parent(s): 2398f65

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +31 -7
main.py CHANGED
@@ -446,9 +446,17 @@ async def _run_apify_actor_for_web_content(url: str, api_token: str, actor_id: s
446
  logger.debug(f"[{actor_name}] POST Request to {sync_items_endpoint} for {url}")
447
  response = await client.post(sync_items_endpoint, headers=headers, params=params, json=run_input)
448
  logger.debug(f"[{actor_name}] Received status code {response.status_code} for {url}")
449
- if response.status_code == 200:
 
 
 
 
 
 
 
450
  try:
451
  results = response.json(); content = None
 
452
  if isinstance(results, list) and len(results) > 0:
453
  item = results[0]
454
  if "text" in item and isinstance(item["text"], str): content = item["text"]
@@ -458,17 +466,33 @@ async def _run_apify_actor_for_web_content(url: str, api_token: str, actor_id: s
458
  logger.warning(f"[{actor_name}] No 'text' or 'markdown' found, attempting to parse 'html' from result.")
459
  soup = BeautifulSoup(item["html"], DEFAULT_PARSER)
460
  content = " ".join(line.strip() for line in soup.get_text(separator='\n', strip=True).splitlines() if line.strip())
461
- if content and isinstance(content, str) and len(content) > 30: logger.info(f"[{actor_name}] Success via REST for {url}. Length: {len(content)}"); return content.strip()
 
 
 
 
462
  else:
463
  content_len = len(content) if content and isinstance(content, str) else 0
464
- logger.warning(f"[{actor_name}] Dataset item parsed but text content empty/short/invalid format for {url}. Item keys: {list(item.keys())}. Length: {content_len}"); return None
465
- else: logger.warning(f"[{actor_name}] Actor success but dataset was empty for {url}. Response: {results}"); return None
466
- except json.JSONDecodeError: logger.error(f"[{actor_name}] Failed JSON decode. Status:{response.status_code}. Resp:{response.text[:200]}"); return None
467
- except Exception as e: logger.error(f"[{actor_name}] Error processing success response for {url}: {e}", exc_info=True); return None
 
 
 
 
 
 
 
 
 
 
468
  elif response.status_code == 400: logger.error(f"[{actor_name}] Bad Request (400) for {url}. Check run_input. Resp:{response.text[:200]}"); return None
469
  elif response.status_code == 401: logger.error(f"[{actor_name}] Auth error (401). Check token."); return None
470
  elif response.status_code == 404: logger.error(f"[{actor_name}] Endpoint/Actor Not Found (404). Actor: {actor_id} Resp:{response.text[:200]}"); return None
471
- else: logger.error(f"[{actor_name}] Unexpected status {response.status_code} for {url}. Resp:{response.text[:200]}"); return None
 
 
472
  except httpx.TimeoutException as e: logger.error(f"[{actor_name}] Timeout during API interaction for {url}: {e}"); return None
473
  except httpx.HTTPStatusError as e: logger.error(f"[{actor_name}] HTTP Status Error during API interaction for {url}: {e}"); return None
474
  except httpx.RequestError as e: logger.error(f"[{actor_name}] Request error during API interaction for {url}: {e}"); return None
 
446
  logger.debug(f"[{actor_name}] POST Request to {sync_items_endpoint} for {url}")
447
  response = await client.post(sync_items_endpoint, headers=headers, params=params, json=run_input)
448
  logger.debug(f"[{actor_name}] Received status code {response.status_code} for {url}")
449
+ logger.debug(f"[{actor_name}] Received status code {response.status_code} for {url}")
450
+
451
+ # <<< MODIFIED check for success: Accept 200 OR 201 >>>
452
+ if response.status_code in [200, 201]:
453
+ # Log if it was 201, as it's slightly unexpected for sync but apparently happens
454
+ if response.status_code == 201:
455
+ logger.info(f"[{actor_name}] Received status 201 (Created) from run-sync endpoint, processing results anyway.")
456
+
457
  try:
458
  results = response.json(); content = None
459
+ # --- The rest of the success processing logic stays the same ---
460
  if isinstance(results, list) and len(results) > 0:
461
  item = results[0]
462
  if "text" in item and isinstance(item["text"], str): content = item["text"]
 
466
  logger.warning(f"[{actor_name}] No 'text' or 'markdown' found, attempting to parse 'html' from result.")
467
  soup = BeautifulSoup(item["html"], DEFAULT_PARSER)
468
  content = " ".join(line.strip() for line in soup.get_text(separator='\n', strip=True).splitlines() if line.strip())
469
+
470
+ # Check content validity AFTER parsing
471
+ if content and isinstance(content, str) and len(content) > 30:
472
+ logger.info(f"[{actor_name}] Success via REST (Status {response.status_code}) for {url}. Length: {len(content)}")
473
+ return content.strip()
474
  else:
475
  content_len = len(content) if content and isinstance(content, str) else 0
476
+ logger.warning(f"[{actor_name}] Dataset item parsed (Status {response.status_code}) but text content empty/short/invalid format for {url}. Item keys: {list(item.keys())}. Length: {content_len}")
477
+ return None # Return None if content is bad, even if API status was 200/201
478
+ else:
479
+ # Handle empty dataset list '[]' which might be returned with 200 or 201
480
+ logger.warning(f"[{actor_name}] Actor call successful (Status {response.status_code}) but dataset was empty for {url}. Response: {results}")
481
+ return None
482
+ # --- End of success processing logic ---
483
+ except json.JSONDecodeError:
484
+ logger.error(f"[{actor_name}] Failed JSON decode. Status:{response.status_code}. Resp:{response.text[:200]}");
485
+ return None
486
+ except Exception as e:
487
+ logger.error(f"[{actor_name}] Error processing success response (Status {response.status_code}) for {url}: {e}", exc_info=True);
488
+ return None
489
+ # <<< The existing elif error handling remains the same >>>
490
  elif response.status_code == 400: logger.error(f"[{actor_name}] Bad Request (400) for {url}. Check run_input. Resp:{response.text[:200]}"); return None
491
  elif response.status_code == 401: logger.error(f"[{actor_name}] Auth error (401). Check token."); return None
492
  elif response.status_code == 404: logger.error(f"[{actor_name}] Endpoint/Actor Not Found (404). Actor: {actor_id} Resp:{response.text[:200]}"); return None
493
+ else: # Catches any other non-200/201 status
494
+ logger.error(f"[{actor_name}] Unexpected status {response.status_code} for {url}. Resp:{response.text[:200]}");
495
+ return None
496
  except httpx.TimeoutException as e: logger.error(f"[{actor_name}] Timeout during API interaction for {url}: {e}"); return None
497
  except httpx.HTTPStatusError as e: logger.error(f"[{actor_name}] HTTP Status Error during API interaction for {url}: {e}"); return None
498
  except httpx.RequestError as e: logger.error(f"[{actor_name}] Request error during API interaction for {url}: {e}"); return None