Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# main.py (
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
@@ -71,7 +71,8 @@ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
|
|
71 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
72 |
|
73 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
74 |
-
|
|
|
75 |
|
76 |
if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
77 |
if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
|
@@ -83,7 +84,7 @@ if not WEBHOOK_SECRET: logger.info("Optional secret 'WEBHOOK_SECRET' not found.
|
|
83 |
|
84 |
logger.info("Secret loading and configuration check finished.")
|
85 |
logger.info(f"Using OpenRouter Model: {OPENROUTER_MODEL}")
|
86 |
-
logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}")
|
87 |
|
88 |
_apify_token_exists = bool(APIFY_API_TOKEN)
|
89 |
|
@@ -109,6 +110,7 @@ def extract_youtube_id(url):
|
|
109 |
else: logger.warning(f"Could not extract YT ID from {url}"); return None
|
110 |
|
111 |
# --- Content Fetching Functions ---
|
|
|
112 |
async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
|
113 |
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
|
114 |
try:
|
@@ -160,74 +162,69 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
|
|
160 |
|
161 |
async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
|
162 |
"""Fetches YouTube transcript using Apify REST API (async start + poll + dataset fetch)."""
|
163 |
-
global APIFY_ACTOR_ID
|
164 |
if not video_url: logger.error("[Apify Async] No video_url provided"); return None
|
165 |
if not api_token: logger.error("[Apify Async] API token missing."); return None
|
166 |
logger.info(f"[Apify Async] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
|
167 |
|
168 |
start_run_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/runs"
|
169 |
params_base = {"token": api_token}
|
|
|
|
|
170 |
payload = {
|
171 |
-
"
|
172 |
-
"
|
173 |
-
"
|
|
|
|
|
|
|
174 |
}
|
175 |
headers = {"Content-Type": "application/json"}
|
176 |
|
177 |
run_id: Optional[str] = None
|
178 |
dataset_id: Optional[str] = None
|
179 |
-
max_wait_seconds = 120
|
180 |
-
poll_interval = 5
|
181 |
|
182 |
try:
|
183 |
-
async with httpx.AsyncClient(timeout=30.0) as client:
|
184 |
# 1. Start the run
|
185 |
-
logger.debug(f"[Apify Async] Starting actor run for {video_url}")
|
186 |
response_start = await client.post(start_run_endpoint, headers=headers, params=params_base, json=payload)
|
187 |
logger.debug(f"[Apify Async] Start run status: {response_start.status_code}")
|
188 |
|
189 |
-
if response_start.status_code == 201:
|
190 |
try:
|
191 |
run_data = response_start.json().get("data", {})
|
192 |
run_id = run_data.get("id")
|
193 |
dataset_id = run_data.get("defaultDatasetId")
|
194 |
-
if not run_id or not dataset_id:
|
195 |
-
logger.error(f"[Apify Async] Started run but missing runId or datasetId. Data: {run_data}")
|
196 |
-
return None
|
197 |
logger.info(f"[Apify Async] Run started. Run ID: {run_id}, Dataset ID: {dataset_id}")
|
198 |
-
except Exception as e:
|
199 |
-
logger.error(f"[Apify Async] Error parsing start run response: {e}. Response: {response_start.text[:200]}", exc_info=True)
|
200 |
-
return None
|
201 |
else:
|
202 |
-
|
|
|
|
|
|
|
|
|
203 |
return None
|
204 |
|
205 |
# 2. Poll for run completion
|
206 |
run_status_endpoint = f"https://api.apify.com/v2/actor-runs/{run_id}"
|
207 |
-
elapsed_time = 0
|
208 |
-
final_status = None
|
209 |
while elapsed_time < max_wait_seconds:
|
210 |
-
await asyncio.sleep(poll_interval)
|
211 |
-
elapsed_time += poll_interval
|
212 |
logger.debug(f"[Apify Async] Polling status for run {run_id} ({elapsed_time}s elapsed)")
|
213 |
try:
|
214 |
response_status = await client.get(run_status_endpoint, params=params_base)
|
215 |
if response_status.status_code == 200:
|
216 |
-
status_data = response_status.json().get("data", {})
|
217 |
-
final_status = status_data.get("status")
|
218 |
logger.debug(f"[Apify Async] Run status: {final_status}")
|
219 |
-
if final_status in ["SUCCEEDED", "FAILED", "ABORTED", "TIMED-OUT"]:
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
except Exception as poll_err:
|
225 |
-
logger.error(f"[Apify Async] Error polling run status {run_id}: {poll_err}")
|
226 |
-
await asyncio.sleep(poll_interval * 2) # Wait longer on error
|
227 |
-
|
228 |
-
if final_status != "SUCCEEDED":
|
229 |
-
logger.warning(f"[Apify Async] Run {run_id} did not succeed. Final status: {final_status}")
|
230 |
-
return None
|
231 |
|
232 |
# 3. Fetch dataset items
|
233 |
logger.info(f"[Apify Async] Run {run_id} succeeded. Fetching items from dataset {dataset_id}")
|
@@ -235,14 +232,27 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
|
|
235 |
params_dataset = {"token": api_token, "format": "json", "limit": 5}
|
236 |
response_dataset = await client.get(dataset_endpoint, params=params_dataset)
|
237 |
logger.debug(f"[Apify Async] Dataset fetch status: {response_dataset.status_code}")
|
238 |
-
response_dataset.raise_for_status()
|
239 |
|
240 |
results = response_dataset.json()
|
241 |
if isinstance(results, list) and len(results) > 0:
|
242 |
item = results[0]
|
243 |
-
|
244 |
-
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
if content and isinstance(content, str):
|
247 |
logger.info(f"[Apify Async] Success via ASYNC REST for {video_url}. Length: {len(content)}")
|
248 |
return content.strip()
|
@@ -254,8 +264,8 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
|
|
254 |
except httpx.RequestError as e: logger.error(f"[Apify Async] Request error during API interaction for {video_url}: {e}"); return None
|
255 |
except Exception as e: logger.error(f"[Apify Async] Unexpected error during Apify Async REST call for {video_url}: {e}", exc_info=True); return None
|
256 |
|
|
|
257 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
258 |
-
# This function remains the same, calling the updated helper functions
|
259 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
260 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
261 |
logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
|
@@ -279,17 +289,16 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
|
|
279 |
else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
|
280 |
else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
|
281 |
if transcript_text is None:
|
282 |
-
logger.info("[Fallback YT 2] Trying Apify REST API (Async)...")
|
283 |
if APIFY_API_TOKEN:
|
284 |
-
transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
|
285 |
if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify ASYNC REST for {video_url}"); return transcript_text
|
286 |
else: logger.warning(f"[Fallback YT 2] Apify ASYNC REST failed or no content for {video_url}.")
|
287 |
else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
|
288 |
if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
|
289 |
-
return transcript_text
|
290 |
|
291 |
async def get_website_content(url: str) -> Optional[str]:
|
292 |
-
# This function remains the same
|
293 |
if not url: logger.error("get_website_content: No URL"); return None
|
294 |
logger.info(f"[Primary Web] Fetching website content for: {url}")
|
295 |
html_content = await fetch_url_content_for_scrape(url)
|
@@ -311,7 +320,6 @@ async def get_website_content(url: str) -> Optional[str]:
|
|
311 |
except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
|
312 |
|
313 |
async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
314 |
-
# This function remains the same
|
315 |
if not url: logger.error("[Fallback Web API] No URL"); return None
|
316 |
if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
|
317 |
logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
|
@@ -338,8 +346,6 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
|
338 |
except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
|
339 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
340 |
|
341 |
-
# --- Summarization Function ---
|
342 |
-
# (generate_summary remains the same)
|
343 |
async def generate_summary(text: str, summary_type: str) -> str:
|
344 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL
|
345 |
logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
@@ -514,9 +520,7 @@ async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> N
|
|
514 |
if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): logger.debug(f"Ignoring known/handled error in error_handler: {context.error}"); return
|
515 |
logger.error("Exception while handling an update:", exc_info=context.error)
|
516 |
|
517 |
-
|
518 |
# --- Bot Setup ---
|
519 |
-
# (setup_bot_config remains the same)
|
520 |
async def setup_bot_config() -> Application:
|
521 |
logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
|
522 |
if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
|
@@ -572,6 +576,7 @@ async def lifespan(app: Starlette):
|
|
572 |
else: logger.info("PTB application not initialized or failed.")
|
573 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
574 |
|
|
|
575 |
# --- Starlette Route Handlers ---
|
576 |
# (health_check and telegram_webhook remain the same)
|
577 |
async def health_check(request: Request) -> PlainTextResponse:
|
@@ -596,6 +601,7 @@ async def telegram_webhook(request: Request) -> Response:
|
|
596 |
except json.JSONDecodeError: logger.error("Webhook invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
|
597 |
except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
|
598 |
|
|
|
599 |
# --- Create Starlette ASGI Application ---
|
600 |
# (app definition remains the same)
|
601 |
app = Starlette( debug=False, lifespan=lifespan, routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
|
|
|
1 |
+
# main.py (Changing Default Apify Actor ID)
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
|
|
71 |
WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
|
72 |
|
73 |
OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free")
|
74 |
+
# *** FIX: Change default actor back to pocesar/youtube-scraper ***
|
75 |
+
APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "pocesar/youtube-scraper")
|
76 |
|
77 |
if not TELEGRAM_TOKEN: logger.critical("❌ FATAL: TELEGRAM_TOKEN not found."); raise RuntimeError("Exiting: Telegram token missing.")
|
78 |
if not OPENROUTER_API_KEY: logger.error("❌ ERROR: OPENROUTER_API_KEY not found. Summarization will fail.")
|
|
|
84 |
|
85 |
logger.info("Secret loading and configuration check finished.")
|
86 |
logger.info(f"Using OpenRouter Model: {OPENROUTER_MODEL}")
|
87 |
+
logger.info(f"Using Apify Actor (via REST): {APIFY_ACTOR_ID}") # Will show pocesar now if default
|
88 |
|
89 |
_apify_token_exists = bool(APIFY_API_TOKEN)
|
90 |
|
|
|
110 |
else: logger.warning(f"Could not extract YT ID from {url}"); return None
|
111 |
|
112 |
# --- Content Fetching Functions ---
|
113 |
+
# (fetch_url_content_for_scrape, get_transcript_via_supadata remain the same)
|
114 |
async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
|
115 |
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
|
116 |
try:
|
|
|
162 |
|
163 |
async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
|
164 |
"""Fetches YouTube transcript using Apify REST API (async start + poll + dataset fetch)."""
|
165 |
+
global APIFY_ACTOR_ID # Uses the globally defined actor ID
|
166 |
if not video_url: logger.error("[Apify Async] No video_url provided"); return None
|
167 |
if not api_token: logger.error("[Apify Async] API token missing."); return None
|
168 |
logger.info(f"[Apify Async] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
|
169 |
|
170 |
start_run_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/runs"
|
171 |
params_base = {"token": api_token}
|
172 |
+
# Adjust payload based on the specific actor (pocesar vs karamelo) if necessary
|
173 |
+
# This payload seems generic enough for pocesar/youtube-scraper too, might need adjustment
|
174 |
payload = {
|
175 |
+
"startUrls": [{"url": video_url}], # pocesar often uses startUrls
|
176 |
+
# "urls": [video_url], # karamelo used urls
|
177 |
+
"proxyConfiguration": {"useApifyProxy": True}, # Common setting
|
178 |
+
"subtitles": True, # Explicitly request if pocesar supports it like this
|
179 |
+
"maxResultStreams": 0, "maxResults": 1, # Limit results
|
180 |
+
# Check pocesar/youtube-scraper docs for exact options
|
181 |
}
|
182 |
headers = {"Content-Type": "application/json"}
|
183 |
|
184 |
run_id: Optional[str] = None
|
185 |
dataset_id: Optional[str] = None
|
186 |
+
max_wait_seconds = 120
|
187 |
+
poll_interval = 5
|
188 |
|
189 |
try:
|
190 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
191 |
# 1. Start the run
|
192 |
+
logger.debug(f"[Apify Async] Starting actor run for {video_url} using actor {APIFY_ACTOR_ID}")
|
193 |
response_start = await client.post(start_run_endpoint, headers=headers, params=params_base, json=payload)
|
194 |
logger.debug(f"[Apify Async] Start run status: {response_start.status_code}")
|
195 |
|
196 |
+
if response_start.status_code == 201:
|
197 |
try:
|
198 |
run_data = response_start.json().get("data", {})
|
199 |
run_id = run_data.get("id")
|
200 |
dataset_id = run_data.get("defaultDatasetId")
|
201 |
+
if not run_id or not dataset_id: logger.error(f"[Apify Async] Started run but missing runId or datasetId. Data: {run_data}"); return None
|
|
|
|
|
202 |
logger.info(f"[Apify Async] Run started. Run ID: {run_id}, Dataset ID: {dataset_id}")
|
203 |
+
except Exception as e: logger.error(f"[Apify Async] Error parsing start run response: {e}. Response: {response_start.text[:200]}", exc_info=True); return None
|
|
|
|
|
204 |
else:
|
205 |
+
# Log specific error if available from Apify response
|
206 |
+
error_info = ""
|
207 |
+
try: error_info = response_start.json().get("error", {}).get("message", "")
|
208 |
+
except Exception: pass
|
209 |
+
logger.error(f"[Apify Async] Failed to start run. Status: {response_start.status_code}. Error: {error_info} Resp: {response_start.text[:200]}")
|
210 |
return None
|
211 |
|
212 |
# 2. Poll for run completion
|
213 |
run_status_endpoint = f"https://api.apify.com/v2/actor-runs/{run_id}"
|
214 |
+
elapsed_time = 0; final_status = None
|
|
|
215 |
while elapsed_time < max_wait_seconds:
|
216 |
+
await asyncio.sleep(poll_interval); elapsed_time += poll_interval
|
|
|
217 |
logger.debug(f"[Apify Async] Polling status for run {run_id} ({elapsed_time}s elapsed)")
|
218 |
try:
|
219 |
response_status = await client.get(run_status_endpoint, params=params_base)
|
220 |
if response_status.status_code == 200:
|
221 |
+
status_data = response_status.json().get("data", {}); final_status = status_data.get("status")
|
|
|
222 |
logger.debug(f"[Apify Async] Run status: {final_status}")
|
223 |
+
if final_status in ["SUCCEEDED", "FAILED", "ABORTED", "TIMED-OUT"]: break
|
224 |
+
else: logger.warning(f"[Apify Async] Non-200 status ({response_status.status_code}) polling run {run_id}."); await asyncio.sleep(poll_interval * 2)
|
225 |
+
except Exception as poll_err: logger.error(f"[Apify Async] Error polling run status {run_id}: {poll_err}"); await asyncio.sleep(poll_interval * 2)
|
226 |
+
|
227 |
+
if final_status != "SUCCEEDED": logger.warning(f"[Apify Async] Run {run_id} did not succeed. Final status: {final_status}"); return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
|
229 |
# 3. Fetch dataset items
|
230 |
logger.info(f"[Apify Async] Run {run_id} succeeded. Fetching items from dataset {dataset_id}")
|
|
|
232 |
params_dataset = {"token": api_token, "format": "json", "limit": 5}
|
233 |
response_dataset = await client.get(dataset_endpoint, params=params_dataset)
|
234 |
logger.debug(f"[Apify Async] Dataset fetch status: {response_dataset.status_code}")
|
235 |
+
response_dataset.raise_for_status()
|
236 |
|
237 |
results = response_dataset.json()
|
238 |
if isinstance(results, list) and len(results) > 0:
|
239 |
item = results[0]
|
240 |
+
# --- Adapt parsing for pocesar/youtube-scraper ---
|
241 |
+
# This actor often puts the transcript directly under a 'subtitles' key,
|
242 |
+
# possibly as a single string or sometimes structured.
|
243 |
+
content = item.get("subtitles") # Check 'subtitles' first
|
244 |
+
if not content: content = item.get("text") # Fallback check
|
245 |
+
if not content: content = item.get("transcript") # Another fallback
|
246 |
+
|
247 |
+
# If subtitles is a list of dicts (like karamelo's output sometimes)
|
248 |
+
if isinstance(content, list) and len(content) > 0 and isinstance(content[0], dict) and 'lines' in content[0]:
|
249 |
+
logger.info("[Apify Async] Processing structured subtitles format.")
|
250 |
+
content = " ".join(line.get("text", "") for line in content[0].get('lines', []) if line.get("text"))
|
251 |
+
elif isinstance(content, list): # Handle simple list of strings if found
|
252 |
+
logger.info("[Apify Async] Processing list of strings format.")
|
253 |
+
content = " ".join(content)
|
254 |
+
|
255 |
+
# Final check if we have a non-empty string
|
256 |
if content and isinstance(content, str):
|
257 |
logger.info(f"[Apify Async] Success via ASYNC REST for {video_url}. Length: {len(content)}")
|
258 |
return content.strip()
|
|
|
264 |
except httpx.RequestError as e: logger.error(f"[Apify Async] Request error during API interaction for {video_url}: {e}"); return None
|
265 |
except Exception as e: logger.error(f"[Apify Async] Unexpected error during Apify Async REST call for {video_url}: {e}", exc_info=True); return None
|
266 |
|
267 |
+
# (get_youtube_transcript, get_website_content, get_website_content_via_api, generate_summary remain the same)
|
268 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
|
|
269 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
270 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
271 |
logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
|
|
|
289 |
else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
|
290 |
else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
|
291 |
if transcript_text is None:
|
292 |
+
logger.info("[Fallback YT 2] Trying Apify REST API (Async)...")
|
293 |
if APIFY_API_TOKEN:
|
294 |
+
transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
|
295 |
if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify ASYNC REST for {video_url}"); return transcript_text
|
296 |
else: logger.warning(f"[Fallback YT 2] Apify ASYNC REST failed or no content for {video_url}.")
|
297 |
else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
|
298 |
if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
|
299 |
+
return transcript_text
|
300 |
|
301 |
async def get_website_content(url: str) -> Optional[str]:
|
|
|
302 |
if not url: logger.error("get_website_content: No URL"); return None
|
303 |
logger.info(f"[Primary Web] Fetching website content for: {url}")
|
304 |
html_content = await fetch_url_content_for_scrape(url)
|
|
|
320 |
except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
|
321 |
|
322 |
async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
|
|
323 |
if not url: logger.error("[Fallback Web API] No URL"); return None
|
324 |
if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
|
325 |
logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
|
|
|
346 |
except httpx.RequestError as e: logger.error(f"[Fallback Web API] Request error connecting to urltotext.com API for {url}: {e}"); return None
|
347 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
348 |
|
|
|
|
|
349 |
async def generate_summary(text: str, summary_type: str) -> str:
|
350 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL
|
351 |
logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
|
|
520 |
if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): logger.debug(f"Ignoring known/handled error in error_handler: {context.error}"); return
|
521 |
logger.error("Exception while handling an update:", exc_info=context.error)
|
522 |
|
|
|
523 |
# --- Bot Setup ---
|
|
|
524 |
async def setup_bot_config() -> Application:
|
525 |
logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
|
526 |
if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
|
|
|
576 |
else: logger.info("PTB application not initialized or failed.")
|
577 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
578 |
|
579 |
+
|
580 |
# --- Starlette Route Handlers ---
|
581 |
# (health_check and telegram_webhook remain the same)
|
582 |
async def health_check(request: Request) -> PlainTextResponse:
|
|
|
601 |
except json.JSONDecodeError: logger.error("Webhook invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
|
602 |
except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
|
603 |
|
604 |
+
|
605 |
# --- Create Starlette ASGI Application ---
|
606 |
# (app definition remains the same)
|
607 |
app = Starlette( debug=False, lifespan=lifespan, routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
|