Spaces:

fmab777
/

telegram-summary-bot

Running

App Files Files Community

fmab777 commited on 26 days ago

Commit

d29aaf9

verified ·

1 Parent(s): a59041f

Update main.py

Browse files

Files changed (1) hide show

main.py +51 -79

main.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# main.py (Updated for Specific April 2025 Models: Llama 4 Scout & DeepSeek V3 Free)
 import os
 import re
 import logging
@@ -99,16 +99,16 @@ APIFY_API_TOKEN = get_secret('APIFY_API_TOKEN')
 RAPIDAPI_KEY = get_secret('RAPIDAPI_KEY')
 WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
-# <<< CHANGE: Using EXACT Model Identifiers from User Docs >>>
 # Model Priority:
 # 1. Groq Llama 4 Scout
 # 2. Gemini 2.5 Pro Exp
 # 3. Gemini 2.0 Flash
 # 4. OpenRouter DeepSeek V3 Free
-GROQ_LLAMA4_MODEL = os.environ.get("GROQ_LLAMA4_MODEL", "meta-llama/llama-4-scout-17b-16e-instruct") # <<< Specific Llama 4 model
 GEMINI_PRO_EXP_MODEL = os.environ.get("GEMINI_PRO_EXP_MODEL", "gemini-2.5-pro-exp-03-25")
 GEMINI_FLASH_MODEL = os.environ.get("GEMINI_FLASH_MODEL", "gemini-2.0-flash-001")
-OPENROUTER_DEEPSEEK_MODEL = os.environ.get("OPENROUTER_DEEPSEEK_MODEL", "deepseek/deepseek-chat-v3-0324:free") # <<< Specific DeepSeek model
 APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") # YT Default
 APIFY_CRAWLER_ACTOR_ID = "apify/website-content-crawler" # Scrape Fallback 4
@@ -184,14 +184,9 @@ def extract_youtube_id(url):
     if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
     else: logger.warning(f"Could not extract YT ID from {url}"); return None
 # --- Content Fetching Functions ---
-# (These functions: get_transcript_via_supadata, get_transcript_via_apify, get_youtube_transcript,
-# fetch_url_content_for_scrape, get_website_content, get_website_content_via_api,
-# get_website_content_via_scrapers_proxy, get_website_content_via_ai_web_scraper,
-# _run_apify_actor_for_web_content, get_website_content_via_apify_crawler,
-# get_website_content_via_apify_text_scraper remain UNCHANGED. They are omitted here for brevity
-# but MUST be included in the final main.py file)
-# --- START OMITTED CONTENT FETCHING FUNCTIONS ---
 async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
     if not video_id: logger.error("[Supadata] No video_id provided"); return None
     if not api_key: logger.error("[Supadata] API key missing."); return None
@@ -204,10 +199,10 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
             logger.debug(f"[Supadata] Status code {response.status_code} for {video_id}")
             if response.status_code == 200:
                 try:
-                    data = response.json() if response.text else None # Check if text exists before json decode
                     content = None
                     if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
-                    if not content and response.text: content = response.text # Fallback to raw text if json parse fails or content key missing
                     if content and isinstance(content, str): logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}"); return content.strip()
                     else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
                 except json.JSONDecodeError: logger.warning(f"[Supadata] Received 200 but failed JSON decode for {video_id}. Using raw text if available. Response: {response.text[:200]}"); return response.text.strip() if response.text else None
@@ -294,6 +289,7 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
     return transcript_text
 async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
     headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
     try:
         async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
@@ -313,6 +309,7 @@ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[
     return None
 async def get_website_content(url: str) -> Optional[str]:
     if not url: logger.error("[Web Scrape Primary] No URL provided"); return None
     logger.info(f"[Web Scrape Primary] Attempting direct fetch and parse for: {url}")
     html_content = await fetch_url_content_for_scrape(url)
@@ -336,6 +333,7 @@ async def get_website_content(url: str) -> Optional[str]:
     except Exception as e: logger.error(f"[Web Scrape Primary] Unexpected error during parsing process for {url}: {e}", exc_info=True); return None
 async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
     if not url: logger.error("[Web Scrape Fallback 1] No URL"); return None
     if not api_key: logger.error("[Web Scrape Fallback 1] urltotext.com API key missing."); return None
     logger.info(f"[Web Scrape Fallback 1] Attempting fetch for: {url} using urltotext.com API")
@@ -363,6 +361,7 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
     except Exception as e: logger.error(f"[Web Scrape Fallback 1] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
 async def get_website_content_via_scrapers_proxy(url: str, api_key: str) -> Optional[str]:
     if not url: logger.error("[Web Scrape Fallback 2] No URL provided"); return None
     if not api_key: logger.error("[Web Scrape Fallback 2] RapidAPI key missing."); return None
     logger.info(f"[Web Scrape Fallback 2] Attempting fetch for: {url} using Scraper's Proxy Parser API")
@@ -396,6 +395,7 @@ async def get_website_content_via_scrapers_proxy(url: str, api_key: str) -> Opti
     except Exception as e: logger.error(f"[Web Scrape Fallback 2] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None
 async def get_website_content_via_ai_web_scraper(url: str, api_key: str) -> Optional[str]:
     if not url: logger.error("[Web Scrape Fallback 3] No URL provided"); return None
     if not api_key: logger.error("[Web Scrape Fallback 3] RapidAPI key missing."); return None
     logger.info(f"[Web Scrape Fallback 3] Attempting fetch for: {url} using AI Web Scraper API")
@@ -432,6 +432,7 @@ async def get_website_content_via_ai_web_scraper(url: str, api_key: str) -> Opti
     except Exception as e: logger.error(f"[Web Scrape Fallback 3] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None
 async def _run_apify_actor_for_web_content(url: str, api_token: str, actor_id: str, actor_name: str) -> Optional[str]:
     if not url: logger.error(f"[{actor_name}] No URL provided"); return None
     if not api_token: logger.error(f"[{actor_name}] API token missing."); return None
     logger.info(f"[{actor_name}] Attempting fetch for URL: {url} (Actor: {actor_id})")
@@ -474,11 +475,12 @@ async def _run_apify_actor_for_web_content(url: str, api_token: str, actor_id: s
     except Exception as e: logger.error(f"[{actor_name}] Unexpected error during {actor_name} call for {url}: {e}", exc_info=True); return None
 async def get_website_content_via_apify_crawler(url: str, api_token: str) -> Optional[str]:
     return await _run_apify_actor_for_web_content( url=url, api_token=api_token, actor_id=APIFY_CRAWLER_ACTOR_ID, actor_name="Apify Crawler" )
 async def get_website_content_via_apify_text_scraper(url: str, api_token: str) -> Optional[str]:
     return await _run_apify_actor_for_web_content( url=url, api_token=api_token, actor_id=APIFY_TEXT_SCRAPER_ACTOR_ID, actor_name="Apify Text Scraper" )
-# --- END OMITTED CONTENT FETCHING FUNCTIONS ---
 # --- Summarization Functions (Using Specific April 2025 Models) ---
@@ -516,7 +518,6 @@ PROMPT_POINTS = (
     "Here is the text to summarise:"
 )
-# <<< Uses the specific GROQ_LLAMA4_MODEL constant >>>
 async def _call_groq(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
     """Internal function to call Groq API (Primary - Llama 4 Scout). Returns (summary, error_message)."""
     global GROQ_API_KEY, GROQ_LLAMA4_MODEL, _groq_enabled
@@ -526,9 +527,7 @@ async def _call_groq(text: str, summary_type: str) -> Tuple[Optional[str], Optio
     logger.info(f"[Groq Primary] Generating {summary_type} summary using {GROQ_LLAMA4_MODEL}. Input length: {len(text)}")
     prompt = PROMPT_PARAGRAPH if summary_type == "paragraph" else PROMPT_POINTS
-    # Input Length Check for Llama 4 Scout (16k context? Be conservative)
-    MAX_INPUT_LENGTH_GROQ = 40000 # ~13k tokens
     if len(text) > MAX_INPUT_LENGTH_GROQ:
         logger.warning(f"[Groq Primary] Input length ({len(text)}) exceeds estimated limit ({MAX_INPUT_LENGTH_GROQ}). Truncating.");
         text = text[:MAX_INPUT_LENGTH_GROQ] + "... (Content truncated)"
@@ -537,18 +536,14 @@ async def _call_groq(text: str, summary_type: str) -> Tuple[Optional[str], Optio
     try:
         groq_client = Groq( api_key=GROQ_API_KEY, timeout=httpx.Timeout(120.0, connect=10.0) )
         logger.info(f"[Groq Primary] Sending request to Groq ({GROQ_LLAMA4_MODEL})...")
         chat_completion = await groq_client.chat.completions.create(
             messages=[ { "role": "user", "content": full_prompt } ],
-            model=GROQ_LLAMA4_MODEL, # <<< Use specific Llama 4 model name
-            temperature=0.7, # <<< Groq default is 1, adjust if needed
-            max_tokens=2048, # <<< Groq default is 1024, adjust if needed for longer summaries
-            top_p=1,
-            stream=False,
-            stop=None,
         )
         logger.info("[Groq Primary] Received response from Groq.")
         if chat_completion.choices and chat_completion.choices[0].message and chat_completion.choices[0].message.content:
             summary = chat_completion.choices[0].message.content
             finish_reason = chat_completion.choices[0].finish_reason
@@ -558,7 +553,6 @@ async def _call_groq(text: str, summary_type: str) -> Tuple[Optional[str], Optio
             logger.warning(f"[Groq Primary] Groq response structure unexpected or content empty. Response: {chat_completion.model_dump_json(indent=2)}")
             finish_reason = chat_completion.choices[0].finish_reason if chat_completion.choices else 'N/A'
             return None, f"Sorry, the primary AI model ({GROQ_LLAMA4_MODEL}) provided an empty or invalid response (Finish Reason: {finish_reason})."
     except GroqError as ge:
          logger.error(f"[Groq Primary] Groq API error: {ge.status_code} - {ge.message}", exc_info=False)
          error_msg = f"Sorry, the primary AI service ({GROQ_LLAMA4_MODEL}) failed. API Error: {ge.status_code}."
@@ -575,7 +569,6 @@ async def _call_groq(text: str, summary_type: str) -> Tuple[Optional[str], Optio
         logger.error(f"[Groq Primary] Unexpected error during Groq API call: {e}", exc_info=True);
         return None, f"Sorry, an unexpected error occurred while using the primary AI service ({GROQ_LLAMA4_MODEL})."
 async def _call_gemini(text: str, summary_type: str, model_name: str) -> Tuple[Optional[str], Optional[str]]:
     """Internal function to call Gemini API. Returns (summary, error_message)."""
     global _gemini_api_enabled
@@ -639,7 +632,6 @@ async def _call_gemini(text: str, summary_type: str, model_name: str) -> Tuple[O
         error_msg = f"Sorry, an unexpected error occurred while using the AI service ({model_name})."
         return None, error_msg
-# <<< Uses the specific OPENROUTER_DEEPSEEK_MODEL constant >>>
 async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
     """Internal function to call OpenRouter API (Final Fallback - DeepSeek V3 Free). Returns (summary, error_message)."""
     global OPENROUTER_API_KEY, OPENROUTER_DEEPSEEK_MODEL, _openrouter_fallback_enabled
@@ -655,17 +647,13 @@ async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str],
         text = text[:MAX_INPUT_LENGTH_OR] + "... (Content truncated)"
     full_prompt = f"{prompt}\n\n{text}"
-    # Use the direct httpx call as before, ensuring the correct model name is in the payload
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
         "Content-Type": "application/json",
-        "HTTP-Referer": os.environ.get("YOUR_SITE_URL", "https://github.com/your-repo"), # Optional header
-        "X-Title": os.environ.get("YOUR_SITE_NAME", "TelegramSummariserBot") # Optional header
-        }
-    payload = {
-        "model": OPENROUTER_DEEPSEEK_MODEL, # <<< Use specific DeepSeek model name
-        "messages": [{"role": "user", "content": full_prompt}],
         }
     openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
     api_timeouts = httpx.Timeout(connect=10.0, read=60.0, write=10.0, pool=60.0)
     response = None
@@ -675,7 +663,6 @@ async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str],
             logger.info(f"[OpenRouter Fallback] Sending request to OpenRouter ({OPENROUTER_DEEPSEEK_MODEL}) with read timeout {api_timeouts.read}s...")
             response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
             logger.info(f"[OpenRouter Fallback] Received response from OpenRouter. Status code: {response.status_code}")
             if response.status_code == 200:
                 try:
                     data = response.json()
@@ -701,12 +688,10 @@ async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str],
                 except Exception: pass
                 logger.error(f"[OpenRouter Fallback] Unexpected status {response.status_code}. Error: '{error_info}' Resp:{response.text[:500]}");
                 return None, f"Sorry, fallback AI service ({OPENROUTER_DEEPSEEK_MODEL}) returned unexpected status ({response.status_code})."
     except httpx.TimeoutException as e: logger.error(f"[OpenRouter Fallback] Timeout error ({type(e)}) connecting/reading from OpenRouter API: {e}"); return None, f"Sorry, the fallback AI service ({OPENROUTER_DEEPSEEK_MODEL}) timed out."
     except httpx.RequestError as e: logger.error(f"[OpenRouter Fallback] Request error connecting to OpenRouter API: {e}"); return None, "Sorry, there was an error connecting to the fallback AI model service."
     except Exception as e: logger.error(f"[OpenRouter Fallback] Unexpected error during OpenRouter call: {e}", exc_info=True); return None, "Sorry, an unexpected error occurred while using the fallback AI service."
 async def generate_summary(text: str, summary_type: str) -> str:
     """
     Generates summary using the specific model hierarchy (April 2025):
@@ -722,7 +707,7 @@ async def generate_summary(text: str, summary_type: str) -> str:
     logger.info("[Summary Generation] Starting process with specific April 2025 model hierarchy.")
     summary: Optional[str] = None
     errors: Dict[str, Optional[str]] = {
-        "Llama4Scout": None, # <<< Use more descriptive keys
         "GeminiProExp": None,
         "GeminiFlash": None,
         "DeepSeekV3": None,
@@ -788,9 +773,6 @@ async def generate_summary(text: str, summary_type: str) -> str:
 # --- Main Processing Logic ---
-# (process_summary_task remains UNCHANGED in its core logic, it correctly calls the updated generate_summary.
-# Omitted here for brevity, but MUST be included in the final file.)
-# --- START OMITTED process_summary_task ---
 async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
     """Handles the entire process: fetching content (with ALL fallbacks) and summarizing."""
     task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
@@ -903,13 +885,9 @@ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit:
              try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
              except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
         logger.info(f"[Task {task_id}] Task completed. Success: {success}")
-# --- END OMITTED process_summary_task ---
 # --- Telegram Handlers ---
-# (start, help_command, handle_potential_url, handle_summary_type_callback, error_handler
-# remain UNCHANGED. Omitted here for brevity, but include in final file.)
-# --- START OMITTED TELEGRAM HANDLERS ---
 async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
     user = update.effective_user; mention = user.mention_html()
     if not user or not update.message: return
@@ -958,7 +936,6 @@ async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYP
     else:
         logger.debug(f"Ignoring non-URL message from {user.id}: {url[:100]}")
 async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
     query = update.callback_query
     if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
@@ -1016,14 +993,9 @@ async def handle_summary_type_callback(update: Update, context: ContextTypes.DEF
 async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
     """Log Errors caused by Updates."""
     logger.error("Exception while handling an update:", exc_info=context.error)
-# --- END OMITTED TELEGRAM HANDLERS ---
 # --- Application Setup & Web Framework ---
-# (setup_bot_config, lifespan, telegram_webhook, app definition
-# remain UNCHANGED. health_check is modified below.
-# Omitted here for brevity, include in final file.)
-# --- START OMITTED APP SETUP/WEB FRAMEWORK (excluding health_check) ---
 async def setup_bot_config() -> Application:
     logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
     if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
@@ -1093,32 +1065,6 @@ async def lifespan(app: Starlette):
         else: logger.info("PTB application was not fully initialized or failed during startup. No shutdown actions needed.")
         logger.info("ASGI Lifespan: Shutdown complete.")
-async def telegram_webhook(request: Request) -> Response:
-    """Handles incoming updates from Telegram."""
-    global WEBHOOK_SECRET
-    if not ptb_app: logger.error("Webhook received but PTB application not initialized."); return PlainTextResponse('Bot not initialized', status_code=503)
-    if not ptb_app.running: logger.warning("Webhook received but PTB application not running."); return PlainTextResponse('Bot not running, cannot process update', status_code=503)
-    if WEBHOOK_SECRET:
-         token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
-         if token_header != WEBHOOK_SECRET: logger.warning(f"Webhook received with invalid secret token. Header: '{token_header}'"); return Response(content="Invalid secret token", status_code=403)
-    try:
-        update_data = await request.json(); update = Update.de_json(data=update_data, bot=ptb_app.bot)
-        logger.debug(f"Processing update_id: {update.update_id} via webhook"); await ptb_app.process_update(update)
-        return Response(status_code=200)
-    except json.JSONDecodeError: logger.error("Webhook received invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
-    except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200)
-# --- Starlette App Definition ---
-# Note: health_check is defined below
-app = Starlette(
-    debug=False,
-    lifespan=lifespan,
-    routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ]
-)
-logger.info("Starlette ASGI application created with health check and webhook routes.")
-# --- END OMITTED APP SETUP/WEB FRAMEWORK ---
-# <<< CHANGE: Updated health check response for specific models >>>
 async def health_check(request: Request) -> PlainTextResponse:
     """Simple health check endpoint."""
     global GROQ_LLAMA4_MODEL, GEMINI_PRO_EXP_MODEL, GEMINI_FLASH_MODEL, OPENROUTER_DEEPSEEK_MODEL, APIFY_ACTOR_ID
@@ -1158,6 +1104,32 @@ async def health_check(request: Request) -> PlainTextResponse:
         f"Web Scrape 5/6 (Apify Actors): {'Enabled' if _apify_token_exists else 'Disabled'}"
     )
 # --- Development Server (if run directly) ---
 if __name__ == '__main__':
     import uvicorn

+# main.py (Full Code - Specific April 2025 Models: Llama 4 Scout & DeepSeek V3 Free)
 import os
 import re
 import logging
 RAPIDAPI_KEY = get_secret('RAPIDAPI_KEY')
 WEBHOOK_SECRET = get_secret('WEBHOOK_SECRET')
+# --- Model Configurations (Specific April 2025) ---
 # Model Priority:
 # 1. Groq Llama 4 Scout
 # 2. Gemini 2.5 Pro Exp
 # 3. Gemini 2.0 Flash
 # 4. OpenRouter DeepSeek V3 Free
+GROQ_LLAMA4_MODEL = os.environ.get("GROQ_LLAMA4_MODEL", "meta-llama/llama-4-scout-17b-16e-instruct") # Specific Llama 4 model
 GEMINI_PRO_EXP_MODEL = os.environ.get("GEMINI_PRO_EXP_MODEL", "gemini-2.5-pro-exp-03-25")
 GEMINI_FLASH_MODEL = os.environ.get("GEMINI_FLASH_MODEL", "gemini-2.0-flash-001")
+OPENROUTER_DEEPSEEK_MODEL = os.environ.get("OPENROUTER_DEEPSEEK_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Specific DeepSeek model
 APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts") # YT Default
 APIFY_CRAWLER_ACTOR_ID = "apify/website-content-crawler" # Scrape Fallback 4
     if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
     else: logger.warning(f"Could not extract YT ID from {url}"); return None
 # --- Content Fetching Functions ---
 async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[str]:
     if not video_id: logger.error("[Supadata] No video_id provided"); return None
     if not api_key: logger.error("[Supadata] API key missing."); return None
             logger.debug(f"[Supadata] Status code {response.status_code} for {video_id}")
             if response.status_code == 200:
                 try:
+                    data = response.json() if response.text else None
                     content = None
                     if data: content = data if isinstance(data, str) else data.get("transcript") or data.get("text") or data.get("data")
+                    if not content and response.text: content = response.text
                     if content and isinstance(content, str): logger.info(f"[Supadata] Success for {video_id}. Length: {len(content)}"); return content.strip()
                     else: logger.warning(f"[Supadata] Success but content empty/invalid for {video_id}. Response: {response.text[:200]}"); return None
                 except json.JSONDecodeError: logger.warning(f"[Supadata] Received 200 but failed JSON decode for {video_id}. Using raw text if available. Response: {response.text[:200]}"); return response.text.strip() if response.text else None
     return transcript_text
 async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[str]:
+    """Directly fetches URL content using httpx."""
     headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Upgrade-Insecure-Requests': '1' }
     try:
         async with httpx.AsyncClient(follow_redirects=True, timeout=timeout, headers=headers) as client:
     return None
 async def get_website_content(url: str) -> Optional[str]:
+    """Primary method: Fetches HTML directly and parses with BeautifulSoup."""
     if not url: logger.error("[Web Scrape Primary] No URL provided"); return None
     logger.info(f"[Web Scrape Primary] Attempting direct fetch and parse for: {url}")
     html_content = await fetch_url_content_for_scrape(url)
     except Exception as e: logger.error(f"[Web Scrape Primary] Unexpected error during parsing process for {url}: {e}", exc_info=True); return None
 async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
+    """Fallback 1: Fetches website content using urltotext.com API."""
     if not url: logger.error("[Web Scrape Fallback 1] No URL"); return None
     if not api_key: logger.error("[Web Scrape Fallback 1] urltotext.com API key missing."); return None
     logger.info(f"[Web Scrape Fallback 1] Attempting fetch for: {url} using urltotext.com API")
     except Exception as e: logger.error(f"[Web Scrape Fallback 1] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
 async def get_website_content_via_scrapers_proxy(url: str, api_key: str) -> Optional[str]:
+    """Fallback 2: Fetches website content using Scraper's Proxy Parser via RapidAPI."""
     if not url: logger.error("[Web Scrape Fallback 2] No URL provided"); return None
     if not api_key: logger.error("[Web Scrape Fallback 2] RapidAPI key missing."); return None
     logger.info(f"[Web Scrape Fallback 2] Attempting fetch for: {url} using Scraper's Proxy Parser API")
     except Exception as e: logger.error(f"[Web Scrape Fallback 2] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None
 async def get_website_content_via_ai_web_scraper(url: str, api_key: str) -> Optional[str]:
+    """Fallback 3: Fetches website content using AI Web Scraper via RapidAPI."""
     if not url: logger.error("[Web Scrape Fallback 3] No URL provided"); return None
     if not api_key: logger.error("[Web Scrape Fallback 3] RapidAPI key missing."); return None
     logger.info(f"[Web Scrape Fallback 3] Attempting fetch for: {url} using AI Web Scraper API")
     except Exception as e: logger.error(f"[Web Scrape Fallback 3] Unexpected error during {api_host} API call for {url}: {e}", exc_info=True); return None
 async def _run_apify_actor_for_web_content(url: str, api_token: str, actor_id: str, actor_name: str) -> Optional[str]:
+    """Generic function to run an Apify actor and get text content."""
     if not url: logger.error(f"[{actor_name}] No URL provided"); return None
     if not api_token: logger.error(f"[{actor_name}] API token missing."); return None
     logger.info(f"[{actor_name}] Attempting fetch for URL: {url} (Actor: {actor_id})")
     except Exception as e: logger.error(f"[{actor_name}] Unexpected error during {actor_name} call for {url}: {e}", exc_info=True); return None
 async def get_website_content_via_apify_crawler(url: str, api_token: str) -> Optional[str]:
+    """Fallback 4: Fetches website content using Apify Website Content Crawler."""
     return await _run_apify_actor_for_web_content( url=url, api_token=api_token, actor_id=APIFY_CRAWLER_ACTOR_ID, actor_name="Apify Crawler" )
 async def get_website_content_via_apify_text_scraper(url: str, api_token: str) -> Optional[str]:
+    """Fallback 5: Fetches website content using Apify Text Scraper Free."""
     return await _run_apify_actor_for_web_content( url=url, api_token=api_token, actor_id=APIFY_TEXT_SCRAPER_ACTOR_ID, actor_name="Apify Text Scraper" )
 # --- Summarization Functions (Using Specific April 2025 Models) ---
     "Here is the text to summarise:"
 )
 async def _call_groq(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
     """Internal function to call Groq API (Primary - Llama 4 Scout). Returns (summary, error_message)."""
     global GROQ_API_KEY, GROQ_LLAMA4_MODEL, _groq_enabled
     logger.info(f"[Groq Primary] Generating {summary_type} summary using {GROQ_LLAMA4_MODEL}. Input length: {len(text)}")
     prompt = PROMPT_PARAGRAPH if summary_type == "paragraph" else PROMPT_POINTS
+    MAX_INPUT_LENGTH_GROQ = 40000 # ~13k tokens for 16k context
     if len(text) > MAX_INPUT_LENGTH_GROQ:
         logger.warning(f"[Groq Primary] Input length ({len(text)}) exceeds estimated limit ({MAX_INPUT_LENGTH_GROQ}). Truncating.");
         text = text[:MAX_INPUT_LENGTH_GROQ] + "... (Content truncated)"
     try:
         groq_client = Groq( api_key=GROQ_API_KEY, timeout=httpx.Timeout(120.0, connect=10.0) )
         logger.info(f"[Groq Primary] Sending request to Groq ({GROQ_LLAMA4_MODEL})...")
         chat_completion = await groq_client.chat.completions.create(
             messages=[ { "role": "user", "content": full_prompt } ],
+            model=GROQ_LLAMA4_MODEL,
+            temperature=0.7, # Adjust from Groq default of 1 if needed
+            max_tokens=2048, # Adjust from Groq default of 1024 if needed
+            top_p=1, stream=False, stop=None,
         )
         logger.info("[Groq Primary] Received response from Groq.")
         if chat_completion.choices and chat_completion.choices[0].message and chat_completion.choices[0].message.content:
             summary = chat_completion.choices[0].message.content
             finish_reason = chat_completion.choices[0].finish_reason
             logger.warning(f"[Groq Primary] Groq response structure unexpected or content empty. Response: {chat_completion.model_dump_json(indent=2)}")
             finish_reason = chat_completion.choices[0].finish_reason if chat_completion.choices else 'N/A'
             return None, f"Sorry, the primary AI model ({GROQ_LLAMA4_MODEL}) provided an empty or invalid response (Finish Reason: {finish_reason})."
     except GroqError as ge:
          logger.error(f"[Groq Primary] Groq API error: {ge.status_code} - {ge.message}", exc_info=False)
          error_msg = f"Sorry, the primary AI service ({GROQ_LLAMA4_MODEL}) failed. API Error: {ge.status_code}."
         logger.error(f"[Groq Primary] Unexpected error during Groq API call: {e}", exc_info=True);
         return None, f"Sorry, an unexpected error occurred while using the primary AI service ({GROQ_LLAMA4_MODEL})."
 async def _call_gemini(text: str, summary_type: str, model_name: str) -> Tuple[Optional[str], Optional[str]]:
     """Internal function to call Gemini API. Returns (summary, error_message)."""
     global _gemini_api_enabled
         error_msg = f"Sorry, an unexpected error occurred while using the AI service ({model_name})."
         return None, error_msg
 async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
     """Internal function to call OpenRouter API (Final Fallback - DeepSeek V3 Free). Returns (summary, error_message)."""
     global OPENROUTER_API_KEY, OPENROUTER_DEEPSEEK_MODEL, _openrouter_fallback_enabled
         text = text[:MAX_INPUT_LENGTH_OR] + "... (Content truncated)"
     full_prompt = f"{prompt}\n\n{text}"
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
         "Content-Type": "application/json",
+        "HTTP-Referer": os.environ.get("YOUR_SITE_URL", "https://github.com/your-repo"),
+        "X-Title": os.environ.get("YOUR_SITE_NAME", "TelegramSummariserBot")
         }
+    payload = { "model": OPENROUTER_DEEPSEEK_MODEL, "messages": [{"role": "user", "content": full_prompt}], }
     openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
     api_timeouts = httpx.Timeout(connect=10.0, read=60.0, write=10.0, pool=60.0)
     response = None
             logger.info(f"[OpenRouter Fallback] Sending request to OpenRouter ({OPENROUTER_DEEPSEEK_MODEL}) with read timeout {api_timeouts.read}s...")
             response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
             logger.info(f"[OpenRouter Fallback] Received response from OpenRouter. Status code: {response.status_code}")
             if response.status_code == 200:
                 try:
                     data = response.json()
                 except Exception: pass
                 logger.error(f"[OpenRouter Fallback] Unexpected status {response.status_code}. Error: '{error_info}' Resp:{response.text[:500]}");
                 return None, f"Sorry, fallback AI service ({OPENROUTER_DEEPSEEK_MODEL}) returned unexpected status ({response.status_code})."
     except httpx.TimeoutException as e: logger.error(f"[OpenRouter Fallback] Timeout error ({type(e)}) connecting/reading from OpenRouter API: {e}"); return None, f"Sorry, the fallback AI service ({OPENROUTER_DEEPSEEK_MODEL}) timed out."
     except httpx.RequestError as e: logger.error(f"[OpenRouter Fallback] Request error connecting to OpenRouter API: {e}"); return None, "Sorry, there was an error connecting to the fallback AI model service."
     except Exception as e: logger.error(f"[OpenRouter Fallback] Unexpected error during OpenRouter call: {e}", exc_info=True); return None, "Sorry, an unexpected error occurred while using the fallback AI service."
 async def generate_summary(text: str, summary_type: str) -> str:
     """
     Generates summary using the specific model hierarchy (April 2025):
     logger.info("[Summary Generation] Starting process with specific April 2025 model hierarchy.")
     summary: Optional[str] = None
     errors: Dict[str, Optional[str]] = {
+        "Llama4Scout": None,
         "GeminiProExp": None,
         "GeminiFlash": None,
         "DeepSeekV3": None,
 # --- Main Processing Logic ---
 async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
     """Handles the entire process: fetching content (with ALL fallbacks) and summarizing."""
     task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
              try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
              except Exception as e: logger.warning(f"[Task {task_id}] Error closing background bot's client: {e}")
         logger.info(f"[Task {task_id}] Task completed. Success: {success}")
 # --- Telegram Handlers ---
 async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
     user = update.effective_user; mention = user.mention_html()
     if not user or not update.message: return
     else:
         logger.debug(f"Ignoring non-URL message from {user.id}: {url[:100]}")
 async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
     query = update.callback_query
     if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
 async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
     """Log Errors caused by Updates."""
     logger.error("Exception while handling an update:", exc_info=context.error)
 # --- Application Setup & Web Framework ---
 async def setup_bot_config() -> Application:
     logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
     if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
         else: logger.info("PTB application was not fully initialized or failed during startup. No shutdown actions needed.")
         logger.info("ASGI Lifespan: Shutdown complete.")
 async def health_check(request: Request) -> PlainTextResponse:
     """Simple health check endpoint."""
     global GROQ_LLAMA4_MODEL, GEMINI_PRO_EXP_MODEL, GEMINI_FLASH_MODEL, OPENROUTER_DEEPSEEK_MODEL, APIFY_ACTOR_ID
         f"Web Scrape 5/6 (Apify Actors): {'Enabled' if _apify_token_exists else 'Disabled'}"
     )
+async def telegram_webhook(request: Request) -> Response:
+    """Handles incoming updates from Telegram."""
+    global WEBHOOK_SECRET
+    if not ptb_app: logger.error("Webhook received but PTB application not initialized."); return PlainTextResponse('Bot not initialized', status_code=503)
+    if not ptb_app.running: logger.warning("Webhook received but PTB application not running."); return PlainTextResponse('Bot not running, cannot process update', status_code=503)
+    if WEBHOOK_SECRET:
+         token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
+         if token_header != WEBHOOK_SECRET: logger.warning(f"Webhook received with invalid secret token. Header: '{token_header}'"); return Response(content="Invalid secret token", status_code=403)
+    try:
+        update_data = await request.json(); update = Update.de_json(data=update_data, bot=ptb_app.bot)
+        logger.debug(f"Processing update_id: {update.update_id} via webhook"); await ptb_app.process_update(update)
+        return Response(status_code=200)
+    except json.JSONDecodeError: logger.error("Webhook received invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
+    except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200)
+# --- Starlette App Definition ---
+app = Starlette(
+    debug=False, # Keep False for production/Hugging Face
+    lifespan=lifespan,
+    routes=[
+        Route("/", endpoint=health_check, methods=["GET"]),
+        Route("/webhook", endpoint=telegram_webhook, methods=["POST"]),
+    ]
+)
+logger.info("Starlette ASGI application created with health check and webhook routes.")
 # --- Development Server (if run directly) ---
 if __name__ == '__main__':
     import uvicorn