Spaces:
Running
Running
# main.py (Revised: Apify 201 fix + Supadata verify=False TEST) | |
import os | |
import re | |
import logging | |
import asyncio | |
import json | |
from flask import Flask, request, Response # For web server | |
from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup | |
from telegram.ext import ( | |
Application, | |
CommandHandler, | |
MessageHandler, | |
filters, | |
ContextTypes, | |
CallbackQueryHandler, | |
ApplicationBuilder # Import ApplicationBuilder | |
) | |
from telegram.constants import ParseMode # Import ParseMode explicitly | |
# Import specific libraries (Ensure these are covered in requirements.txt) | |
from youtube_transcript_api import YouTubeTranscriptApi | |
import requests | |
from bs4 import BeautifulSoup | |
# Only import ApifyClient if you might use it (i.e., have the secret) | |
_apify_token_exists = bool(os.environ.get('APIFY_API_TOKEN')) | |
if _apify_token_exists: | |
from apify_client import ApifyClient | |
else: | |
ApifyClient = None | |
# --- Logging Setup --- | |
logging.basicConfig( | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
level=logging.DEBUG # Keep DEBUG | |
) | |
logging.getLogger("httpx").setLevel(logging.WARNING) | |
if ApifyClient: logging.getLogger("apify_client").setLevel(logging.WARNING) | |
logging.getLogger("telegram.ext").setLevel(logging.DEBUG) | |
logging.getLogger('telegram.bot').setLevel(logging.DEBUG) | |
logging.getLogger("urllib3").setLevel(logging.INFO) # Reduce requests noise slightly | |
logging.getLogger('gunicorn.error').setLevel(logging.INFO) | |
logger = logging.getLogger(__name__) | |
logger.info("Logging configured (DEBUG level).") | |
# --- Environment Variable Loading --- | |
logger.info("Attempting to load secrets from environment variables...") | |
def get_secret(secret_name): | |
logger.debug(f"Attempting to read secret: {secret_name}") | |
value = os.environ.get(secret_name) | |
if value: logger.info(f"Secret '{secret_name}': Found (Value length: {len(value)})") | |
else: logger.warning(f"Secret '{secret_name}': Not Found") | |
return value | |
TELEGRAM_TOKEN = get_secret('TELEGRAM_TOKEN') | |
OPENROUTER_API_KEY = get_secret('OPENROUTER_API_KEY') | |
URLTOTEXT_API_KEY = get_secret('URLTOTEXT_API_KEY') | |
SUPADATA_API_KEY = get_secret('SUPADATA_API_KEY') | |
APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN') | |
logger.info("Secret loading attempt finished.") | |
# --- Bot Logic Functions --- | |
# Helper Functions | |
def is_youtube_url(url): | |
"""Checks if the URL is a valid YouTube video or shorts URL.""" | |
youtube_regex = r'(https?://)?(www\.)?(youtube\.com/(watch\?v=|shorts/)|youtu\.be/)([\w-]{11})' | |
return bool(re.search(youtube_regex, url)) | |
def extract_youtube_id(url): | |
"""Extracts the YouTube video ID from a URL.""" | |
youtube_id_regex = r'(?:youtube\.com/(?:watch\?v=|shorts/)|youtu\.be/)([\w-]{11})' | |
match = re.search(youtube_id_regex, url) | |
if match: | |
return match.group(1) | |
logger.warning(f"Could not extract YouTube ID from URL: {url}") | |
return None | |
# Supadata Transcript Fetching | |
async def get_transcript_via_supadata(video_id: str, api_key: str): | |
"""Fetches YouTube transcript via Supadata API.""" | |
if not video_id: logger.error("[Supadata] get_transcript_via_supadata called with no video_id"); return None | |
if not api_key: logger.error("[Supadata] API key is missing."); return None | |
logger.info(f"[Supadata] Attempting fetch for video ID: {video_id}") | |
api_endpoint = f"https://api.supadata.net/v1/youtube/transcript" | |
params = {"videoId": video_id, "format": "text"} | |
headers = {"X-API-Key": api_key} | |
try: | |
# ---!!! INSECURE TEST - DISABLES SSL VERIFICATION !!!--- | |
logger.warning("[Supadata] Making request with verify=False (INSECURE TEST)") | |
response = await asyncio.to_thread(requests.get, api_endpoint, headers=headers, params=params, timeout=30, verify=False) | |
# ---!!! END INSECURE TEST --- (Remember to remove verify=False later) --- | |
logger.debug(f"[Supadata] Received status code {response.status_code} for {video_id}") | |
if response.status_code == 200: | |
# (Rest of the success handling code remains the same) | |
try: | |
data = response.json() | |
content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data") | |
if content and isinstance(content, str): | |
logger.info(f"[Supadata] Successfully fetched transcript for {video_id}. Length: {len(content)}") | |
return content.strip() | |
else: | |
logger.warning(f"[Supadata] API success but content empty/invalid for {video_id}. Response: {data}") | |
return None | |
except json.JSONDecodeError: | |
if response.text: | |
logger.info(f"[Supadata] Successfully fetched transcript (plain text) for {video_id}. Length: {len(response.text)}") | |
return response.text.strip() | |
else: | |
logger.error(f"[Supadata] Failed to decode JSON response (and no text body) for {video_id}. Response: {response.text[:200]}...") | |
return None | |
except Exception as e: | |
logger.error(f"[Supadata] Error processing successful response for {video_id}: {e}", exc_info=True) | |
return None | |
elif response.status_code in [401, 403]: | |
logger.error(f"[Supadata] Authentication error ({response.status_code}). Check API key.") | |
return None | |
elif response.status_code == 404: | |
logger.warning(f"[Supadata] Transcript not found ({response.status_code}) for {video_id}.") | |
return None | |
else: | |
logger.error(f"[Supadata] Unexpected status code {response.status_code} for {video_id}. Response: {response.text[:200]}...") | |
return None | |
except requests.exceptions.Timeout: | |
logger.error(f"[Supadata] Timeout error connecting to API for {video_id}") | |
return None | |
except requests.exceptions.RequestException as e: | |
logger.error(f"[Supadata] Request error connecting to API for {video_id}: {e}") | |
# Log the specific SSLError if verify=False wasn't the only issue | |
if isinstance(e, requests.exceptions.SSLError): | |
logger.error(f"[Supadata] SSL Error details: {e}") | |
return None | |
except Exception as e: | |
logger.error(f"[Supadata] Unexpected error during API call for {video_id}: {e}", exc_info=True) | |
return None | |
# Apify Transcript Fetching | |
async def get_transcript_via_apify(video_url: str, api_token: str): | |
"""Fetches YouTube transcript via Apify API.""" | |
if not video_url: logger.error("[Apify] get_transcript_via_apify called with no video_url"); return None | |
if not api_token: logger.error("[Apify] API token is missing."); return None | |
if not ApifyClient: logger.error("[Apify] ApifyClient not available/imported."); return None | |
logger.info(f"[Apify] Attempting fetch for URL: {video_url}") | |
actor_id = "karamelo~youtube-transcripts" | |
api_endpoint = f"https://api.apify.com/v2/acts/{actor_id}/run-sync-get-dataset-items" | |
params = {"token": api_token} | |
payload = json.dumps({ | |
"urls": [video_url], | |
"outputFormat": "singleStringText", | |
"maxRetries": 5, | |
"channelHandleBoolean": False, | |
"channelNameBoolean": False, | |
"datePublishedBoolean": False, | |
"relativeDateTextBoolean": False, | |
}) | |
headers = {"Content-Type": "application/json"} | |
try: | |
logger.debug(f"[Apify] Sending request to run actor {actor_id} synchronously for {video_url}") | |
response = await asyncio.to_thread(requests.post, api_endpoint, headers=headers, params=params, data=payload, timeout=90) | |
logger.debug(f"[Apify] Received status code {response.status_code} for {video_url}") | |
# --- MODIFIED STATUS CODE CHECK --- | |
if response.status_code in [200, 201]: # Accept 200 OK or 201 Created | |
# --- END MODIFIED STATUS CODE CHECK --- | |
try: | |
results = response.json() | |
if isinstance(results, list) and len(results) > 0: | |
item = results[0] | |
content = item.get("text") or item.get("transcript") or item.get("captions_concatenated") | |
if not content and item.get("captions") and isinstance(item["captions"], list): | |
logger.info("[Apify] Processing 'captions' format.") | |
content = " ".join(cap.get("text", "") for cap in item["captions"] if cap.get("text")) | |
if content and isinstance(content, str): | |
logger.info(f"[Apify] Successfully fetched transcript for {video_url} (Status: {response.status_code}). Length: {len(content)}") | |
return content.strip() | |
else: | |
logger.warning(f"[Apify] Actor run successful ({response.status_code}) but content not found/empty for {video_url}. Item: {item}") | |
return None | |
else: | |
logger.warning(f"[Apify] Actor run successful ({response.status_code}) but dataset empty for {video_url}. Response: {results}") | |
return None | |
except json.JSONDecodeError: | |
logger.error(f"[Apify] Failed to decode JSON response for {video_url}. Status: {response.status_code}. Resp: {response.text[:200]}...") | |
return None | |
except Exception as e: | |
logger.error(f"[Apify] Error processing successful response ({response.status_code}) for {video_url}: {e}", exc_info=True) | |
return None | |
elif response.status_code == 400: | |
logger.error(f"[Apify] Bad Request (400) for {video_url}. Check payload. Response: {response.text[:200]}...") | |
return None | |
elif response.status_code == 401: | |
logger.error("[Apify] Authentication error (401). Check API token.") | |
return None | |
else: # Catch other non-200/201 codes here | |
logger.error(f"[Apify] Unexpected status code {response.status_code} for {video_url}. Response: {response.text[:200]}...") | |
return None | |
except requests.exceptions.Timeout: | |
logger.error(f"[Apify] Timeout error running actor for {video_url}") | |
return None | |
except requests.exceptions.RequestException as e: | |
logger.error(f"[Apify] Request error running actor for {video_url}: {e}") | |
return None | |
except Exception as e: | |
logger.error(f"[Apify] Unexpected error during Apify call for {video_url}: {e}", exc_info=True) | |
return None | |
# (The rest of the functions: get_youtube_transcript, get_website_content_via_requests, | |
# get_website_content_via_urltotext_api, generate_summary, start, help_command, | |
# handle_potential_url, handle_summary_type_callback, error_handler, setup_bot, | |
# webhook, index, and the main execution block remain EXACTLY THE SAME as in the | |
# previous complete code block. Ensure they are included below this point.) | |
# Combined YouTube Transcript Function (with Fallbacks) | |
async def get_youtube_transcript(video_id: str, video_url: str, supadata_key: str | None, apify_token: str | None): | |
"""Fetches YouTube transcript using library, then Supadata, then Apify.""" | |
if not video_id: logger.error("get_youtube_transcript called with no video_id"); return None | |
logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})") | |
transcript_text = None | |
# 1. Primary Method: youtube-transcript-api | |
logger.info("[Primary YT] Attempting youtube-transcript-api...") | |
try: | |
transcript_list = await asyncio.to_thread( | |
YouTubeTranscriptApi.get_transcript, | |
video_id, | |
languages=['en', 'en-GB', 'en-US'] | |
) | |
if transcript_list: | |
transcript_text = " ".join([item['text'] for item in transcript_list if 'text' in item]) | |
if transcript_text: | |
logger.info(f"[Primary YT] Successfully fetched transcript via library for {video_id} (length: {len(transcript_text)})") | |
return transcript_text | |
else: | |
logger.warning(f"[Primary YT] Joined transcript text is empty for {video_id}") | |
transcript_text = None | |
else: | |
logger.warning(f"[Primary YT] Transcript list empty for {video_id}") | |
transcript_text = None | |
except Exception as e: | |
logger.warning(f"[Primary YT] Error getting transcript via library for {video_id}: {e}") | |
if "YouTube is blocking requests" in str(e): logger.warning("[Primary YT] IP likely blocked by YouTube.") | |
elif "No transcript found" in str(e): logger.warning(f"[Primary YT] No transcript found for {video_id}.") | |
elif "disabled" in str(e): logger.warning(f"[Primary YT] Transcripts disabled for {video_id}.") | |
transcript_text = None | |
# 2. Fallback 1: Supadata API | |
if transcript_text is None: | |
logger.info("[Fallback YT 1] Primary method failed. Trying Supadata API...") | |
if supadata_key: | |
transcript_text = await get_transcript_via_supadata(video_id, supadata_key) | |
if transcript_text: | |
logger.info(f"[Fallback YT 1] Successfully fetched transcript via Supadata for {video_id}") | |
return transcript_text | |
else: | |
logger.warning(f"[Fallback YT 1] Supadata API failed or returned no content for {video_id}.") | |
else: | |
logger.warning("[Fallback YT 1] Supadata API key not available. Skipping.") | |
# 3. Fallback 2: Apify API | |
if transcript_text is None: | |
logger.info("[Fallback YT 2] Primary & Supadata failed. Trying Apify API...") | |
if apify_token: | |
transcript_text = await get_transcript_via_apify(video_url, apify_token) | |
if transcript_text: | |
logger.info(f"[Fallback YT 2] Successfully fetched transcript via Apify for {video_url}") | |
return transcript_text | |
else: | |
logger.warning(f"[Fallback YT 2] Apify API failed or returned no content for {video_url}.") | |
else: | |
logger.warning("[Fallback YT 2] Apify API token not available. Skipping.") | |
# If all methods failed | |
if transcript_text is None: | |
logger.error(f"All methods failed to fetch transcript for video ID: {video_id}") | |
return None | |
return transcript_text | |
# Website Content via Requests/BS4 (Primary Method for Simplified Bot) | |
async def get_website_content_via_requests(url): | |
"""Attempts to scrape website content using requests/BeautifulSoup (Primary Method).""" | |
if not url: logger.error("[Web Scraper - Requests/BS4] called with no URL"); return None | |
logger.info(f"[Web Scraper - Requests/BS4] Fetching website content for: {url}") | |
try: | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', | |
'Accept-Language': 'en-US,en;q=0.9', | |
'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' } | |
logger.debug(f"[Web Scraper - Requests/BS4] Sending request to {url}") | |
response = await asyncio.to_thread(requests.get, url, headers=headers, timeout=25, allow_redirects=True) | |
response.raise_for_status() | |
logger.debug(f"[Web Scraper - Requests/BS4] Received response {response.status_code} from {url}") | |
content_type = response.headers.get('content-type', '').lower() | |
if 'html' not in content_type: | |
logger.warning(f"[Web Scraper - Requests/BS4] Non-HTML content type received from {url}: {content_type}") | |
return None | |
soup = BeautifulSoup(response.text, 'html.parser') | |
for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "textarea", "select", "option", "label", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "video", "audio"]): element.extract() | |
main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content') or soup.find(id='main-content') or soup.find(class_='main-content') or soup.find(role='main') | |
target_element = main_content if main_content else soup.body | |
if not target_element: | |
logger.warning(f"[Web Scraper - Requests/BS4] Could not find body or main content container for parsing {url}") | |
return None | |
lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()] | |
text = "\n".join(lines) | |
if not text or len(text) < 50: | |
logger.warning(f"[Web Scraper - Requests/BS4] Extracted text is very short or empty after cleaning for {url} (Length: {len(text)})") | |
logger.info(f"[Web Scraper - Requests/BS4] Successfully scraped content for {url} (final length: {len(text)})") | |
return text | |
except requests.exceptions.Timeout: logger.error(f"[Web Scraper - Requests/BS4] Timeout error scraping website: {url}"); return None | |
except requests.exceptions.TooManyRedirects: logger.error(f"[Web Scraper - Requests/BS4] Too many redirects error scraping website: {url}"); return None | |
except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - Requests/BS4] Request error scraping website {url}: {e}"); return None | |
except Exception as e: logger.error(f"[Web Scraper - Requests/BS4] Error scraping or parsing website {url}: {e}", exc_info=True); return None | |
# Website Content via URLToText API (Fallback Method) | |
async def get_website_content_via_urltotext_api(url: str, api_key: str): | |
"""Fetches website content using the URLToText API (Fallback).""" | |
if not url: logger.error("[Web Scraper - URLToText API] called with no URL"); return None | |
if not api_key: logger.error("[Web Scraper - URLToText API] API key is missing."); return None | |
logger.info(f"[Web Scraper - URLToText API] Attempting to fetch content for: {url}") | |
api_endpoint = "https://urltotext.com/api/v1/urltotext/" | |
payload = json.dumps({ "url": url, "output_format": "text", "extract_main_content": True, "render_javascript": True, "residential_proxy": False }) | |
headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json" } | |
try: | |
logger.debug(f"[Web Scraper - URLToText API] Sending request for {url}") | |
response = await asyncio.to_thread(requests.post, api_endpoint, headers=headers, data=payload, timeout=45) | |
logger.debug(f"[Web Scraper - URLToText API] Received status code {response.status_code} for {url}") | |
if response.status_code == 200: | |
try: | |
data = response.json() | |
content = data.get("data", {}).get("content") | |
credits = data.get("credits_used", "N/A") | |
warning = data.get("data", {}).get("warning") | |
if warning: logger.warning(f"[Web Scraper - URLToText API] Warning for {url}: {warning}") | |
if content: logger.info(f"[Web Scraper - URLToText API] Successfully fetched content via API for {url}. Length: {len(content)}. Credits: {credits}"); return content.strip() | |
else: logger.warning(f"[Web Scraper - URLToText API] API returned success but content was empty for {url}. Response: {data}"); return None | |
except json.JSONDecodeError: logger.error(f"[Web Scraper - URLToText API] Failed to decode JSON response for {url}. Response: {response.text[:500]}..."); return None | |
except Exception as e: logger.error(f"[Web Scraper - URLToText API] Error processing successful API response for {url}: {e}", exc_info=True); return None | |
elif response.status_code in [400, 402, 422, 500]: logger.error(f"[Web Scraper - URLToText API] Error {response.status_code} from API for {url}. Response: {response.text[:200]}..."); return None | |
else: logger.error(f"[Web Scraper - URLToText API] Unexpected status {response.status_code} from API for {url}. Response: {response.text[:200]}..."); return None | |
except requests.exceptions.Timeout: logger.error(f"[Web Scraper - URLToText API] Timeout error connecting to API for {url}"); return None | |
except requests.exceptions.RequestException as e: logger.error(f"[Web Scraper - URLToText API] Request error connecting to API for {url}: {e}"); return None | |
except Exception as e: logger.error(f"[Web Scraper - URLToText API] Unexpected error during API call for {url}: {e}", exc_info=True); return None | |
# DeepSeek Summary Function (via OpenRouter) | |
async def generate_summary(text: str, summary_type: str, api_key: str) -> str: | |
"""Generates summary using DeepSeek via OpenRouter API.""" | |
logger.info(f"Generating {summary_type} summary using DeepSeek/OpenRouter. Input text length: {len(text)}") | |
if not api_key: logger.error("OpenRouter API key was not provided."); return "Error: AI model config key missing." | |
openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions" | |
model_name = "deepseek/deepseek-chat:free" | |
if summary_type == "paragraph": prompt = "..." # Keep prompt as before | |
else: prompt = """...""" # Keep prompt as before | |
MAX_INPUT_LENGTH = 500000 | |
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input text ({len(text)}) > limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Truncated)" | |
full_prompt = f"{prompt}\n\n{text}" | |
headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "HTTP-Referer": "https://huggingface.co/spaces/", "X-Title": "Telegram Summary Bot (HF Space)"} | |
payload = json.dumps({ "model": model_name, "messages": [{"role": "user", "content": full_prompt}]}) | |
try: | |
logger.debug(f"Sending request to OpenRouter ({model_name})...") | |
response = await asyncio.to_thread(requests.post, openrouter_api_endpoint, headers=headers, data=payload, timeout=60) | |
logger.debug(f"Received status code {response.status_code} from OpenRouter.") | |
if response.status_code == 200: | |
try: | |
data = response.json() | |
if data.get("choices") and data["choices"]: | |
message = data["choices"][0].get("message") | |
if message and message.get("content"): | |
summary = message["content"].strip() | |
logger.info(f"Success generating summary. Len: {len(summary)}") | |
return summary | |
else: logger.warning(f"OpenRouter success but empty content. Resp: {data}"); return "Sorry, AI model returned empty summary." | |
else: | |
if data.get("error"): logger.error(f"OpenRouter API Error: {data['error']}") | |
else: logger.error(f"Unexpected choices structure: {data.get('choices')}. Resp: {data}") | |
return "Sorry, could not parse AI response (choices/error)." | |
except json.JSONDecodeError: logger.error(f"Failed JSON decode from OpenRouter. Status: {response.status_code}. Resp: {response.text[:500]}..."); return "Sorry, failed to understand AI response." | |
except Exception as e: logger.error(f"Error processing OpenRouter success resp: {e}", exc_info=True); return "Sorry, error processing AI response." | |
elif response.status_code == 401: logger.error("OpenRouter API key invalid (401). Check HF Secrets."); return "Error: AI model config key invalid." | |
elif response.status_code == 402: logger.error("OpenRouter Payment Required (402). Check OpenRouter account."); return "Sorry, issue with AI service limits/payment." | |
elif response.status_code == 429: logger.warning("OpenRouter Rate Limit (429)."); return "Sorry, AI model busy. Try again." | |
elif response.status_code == 500: logger.error(f"OpenRouter Internal Error (500). Resp: {response.text[:500]}..."); return "Sorry, AI model service error. Try again later." | |
else: | |
logger.error(f"Unexpected status {response.status_code} from OpenRouter. Resp: {response.text[:500]}...") | |
try: error_data = response.json(); error_msg = error_data.get("error", {}).get("message", response.text[:100]); return f"Sorry, AI service error ({response.status_code}): {error_msg}" | |
except: return f"Sorry, AI service returned status {response.status_code}." | |
except requests.exceptions.Timeout: logger.error("Timeout connecting to OpenRouter."); return "Sorry, request to AI model timed out." | |
except requests.exceptions.RequestException as e: logger.error(f"Request error connecting to OpenRouter: {e}"); return "Sorry, error connecting to AI model service." | |
except Exception as e: logger.error(f"Unexpected error in generate_summary: {e}", exc_info=True); return "Sorry, unexpected error generating summary." | |
# --- Telegram Bot Handlers --- | |
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: | |
user = update.effective_user; logger.info(f"User {user.id} ({user.username or 'NoUsername'}) used /start.") | |
mention = user.mention_html() if user.username else user.first_name | |
await update.message.reply_html(f"π Hello {mention}! I can summarize YouTube links or website URLs.\n\nJust send me a link anytime!") | |
async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: | |
logger.info(f"User {update.effective_user.id} used /help.") | |
await update.message.reply_text("...", parse_mode=ParseMode.MARKDOWN) # Keep help text | |
async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: | |
if not update.message or not update.message.text: return | |
url = update.message.text.strip(); user = update.effective_user | |
logger.info(f"User {user.id} ({user.username or 'NoUsername'}) sent potential URL: {url}") | |
if not (url.startswith('http://') or url.startswith('https://')) or '.' not in url[8:]: logger.debug(f"Ignoring non-URL: {url}"); return | |
context.user_data['url_to_summarize'] = url; logger.debug(f"Stored URL '{url}' for user {user.id}") | |
keyboard = [[InlineKeyboardButton("Paragraph", callback_data="paragraph"), InlineKeyboardButton("Points", callback_data="points")]] | |
reply_markup = InlineKeyboardMarkup(keyboard) | |
await update.message.reply_text(f"Link detected:\n{url}\n\nChoose summary type:", reply_markup=reply_markup, disable_web_page_preview=True) | |
async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: | |
query = update.callback_query; await query.answer() | |
summary_type = query.data; user = update.effective_user or query.from_user | |
url = context.user_data.get('url_to_summarize') | |
logger.info(f"User {user.id} chose '{summary_type}' for URL '{url}'.") | |
if not url: logger.warning(f"User {user.id} pressed button, NO URL in context."); await query.edit_message_text(text="Context lost. Send link again."); return | |
context.user_data.pop('url_to_summarize', None); logger.debug(f"Cleared URL {url} for user {user.id}") | |
current_openrouter_key = os.environ.get('OPENROUTER_API_KEY'); current_urltotext_key = os.environ.get('URLTOTEXT_API_KEY') | |
current_supadata_key = os.environ.get('SUPADATA_API_KEY'); current_apify_token = os.environ.get('APIFY_API_TOKEN') | |
if not current_openrouter_key: logger.error("OpenRouter key missing."); await context.bot.send_message(chat_id=user.id, text="Error: AI config missing."); await query.delete_message(); return | |
processing_message = f"Working on '{summary_type}' summary for:\n{url}\n..."; message_to_delete_later = None | |
try: await query.edit_message_text(processing_message); logger.debug(f"Edited message query {query.id}") | |
except Exception as e: logger.warning(f"Could not edit message {query.id}: {e}. Sending new."); message_to_delete_later = await context.bot.send_message(chat_id=user.id, text=processing_message) | |
content = None; user_feedback_message = None; success = False; is_youtube = is_youtube_url(url) | |
try: | |
logger.debug(f"Sending 'typing' action for chat {user.id}"); await context.bot.send_chat_action(chat_id=user.id, action='typing') | |
if is_youtube: | |
video_id = extract_youtube_id(url) | |
if video_id: | |
logger.info(f"Fetching YT transcript: {video_id}"); content = await get_youtube_transcript(video_id, url, current_supadata_key, current_apify_token) | |
user_feedback_message = None if content else "Sorry, couldn't get YT transcript." | |
logger.info(f"YT transcript fetch done. Found: {bool(content)}") | |
else: user_feedback_message = "Sorry, couldn't parse YT URL." | |
else: | |
logger.info(f"Scraping website (Requests/BS4): {url}"); content = await get_website_content_via_requests(url) | |
if content: logger.info("Website scrape (Requests/BS4) OK."); user_feedback_message = None | |
else: | |
logger.warning(f"Website scrape failed for {url}. Trying URLToText API."); | |
if current_urltotext_key: | |
await context.bot.send_chat_action(chat_id=user.id, action='typing'); content = await get_website_content_via_urltotext_api(url, current_urltotext_key) | |
if content: logger.info("URLToText API scrape OK."); user_feedback_message = None | |
else: user_feedback_message = "Sorry, couldn't fetch web content (both methods)." | |
else: user_feedback_message = "Sorry, couldn't fetch web content (fallback not configured)." | |
if content: | |
logger.info("Content found, generating summary."); await context.bot.send_chat_action(chat_id=user.id, action='typing') | |
summary = await generate_summary(content, summary_type, current_openrouter_key) | |
if summary.startswith("Error:") or summary.startswith("Sorry,"): user_feedback_message = summary; logger.warning(f"Summary generation failed: {summary}") | |
else: logger.info("Summary generated OK. Sending."); await context.bot.send_message(chat_id=user.id, text=summary, parse_mode=ParseMode.MARKDOWN, disable_web_page_preview=True); success = True; user_feedback_message = None | |
elif not user_feedback_message: user_feedback_message = "Sorry, couldn't retrieve content." | |
if user_feedback_message and not success: logger.warning(f"Sending failure feedback: {user_feedback_message}"); await context.bot.send_message(chat_id=user.id, text=user_feedback_message) | |
except Exception as e: logger.error(f"Unexpected error in callback processing: {e}", exc_info=True); await context.bot.send_message(chat_id=user.id, text="Oops! Internal error.") | |
finally: | |
logger.debug("Cleaning up status message..."); | |
try: | |
if message_to_delete_later: await context.bot.delete_message(chat_id=user.id, message_id=message_to_delete_later.message_id); logger.debug("Deleted separate status msg.") | |
elif query: await query.delete_message(); logger.debug(f"Deleted original message query {query.id}.") | |
except Exception as del_e: logger.warning(f"Could not delete status/button message: {del_e}") | |
async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None: logger.error(f"Exception while handling update: {context.error}", exc_info=context.error) | |
# --- Bot Application Setup Function --- | |
async def setup_bot(): | |
logger.info("Setting up Telegram Application..."); | |
if not TELEGRAM_TOKEN: logger.critical("Cannot initialize: TELEGRAM_TOKEN missing."); return None | |
application = Application.builder().token(TELEGRAM_TOKEN).build() | |
logger.info("Running application.initialize()..."); await application.initialize(); logger.info("Finished application.initialize().") | |
application.add_handler(CommandHandler("start", start)); application.add_handler(CommandHandler("help", help_command)) | |
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_potential_url)); application.add_handler(CallbackQueryHandler(handle_summary_type_callback)) | |
application.add_error_handler(error_handler); logger.info("Telegram handlers registered.") | |
return application | |
logger.info("Running bot setup..."); ptb_app = asyncio.run(setup_bot()); logger.info(f"Bot setup finished. App instance: {'OK' if ptb_app else 'Failed'}") | |
# --- Flask App Setup --- | |
app = Flask(__name__); logger.info("Flask app created.") | |
# --- Webhook Endpoint --- | |
async def webhook() -> Response: | |
logger.info("Webhook request received...") | |
if not ptb_app: logger.error("PTB App not initialized."); return Response('Bot not configured', status=500) | |
if request.is_json: | |
try: | |
update = Update.de_json(request.get_json(), ptb_app.bot); logger.debug(f"Processing update ID: {update.update_id}") | |
logger.debug("Directly awaiting process_update..."); await ptb_app.process_update(update); logger.debug("Finished awaiting process_update.") | |
return Response('ok', status=200) | |
except json.JSONDecodeError: logger.error("Failed JSON decode from Telegram."); return Response('Bad Request', status=400) | |
except Exception as e: logger.error(f"Error processing update in webhook: {e}", exc_info=True); return Response('Internal Server Error', status=500) | |
else: logger.warning("Received non-JSON request to webhook."); return Response('Bad Request', status=400) | |
def index(): logger.debug("Health check '/' accessed."); bot_status = "Initialized" if ptb_app else "FAILED Init"; return f"TG Bot Webhook Listener ({bot_status}) running." | |
# --- Main Execution Block --- | |
if __name__ == '__main__': | |
if not ptb_app: logger.critical("Aborting local Flask start: PTB App failed init.") | |
else: logger.info("Starting Flask server directly (local testing?)..."); port = int(os.environ.get('PORT', 5000)); app.run(host='0.0.0.0', port=port, debug=True) |