diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -22,31 +22,40 @@ import traceback # For detailed error logging
 
 # Configure logging
 logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
+    level=logging.INFO, # Set to INFO, can change to DEBUG for more verbosity if needed
+    format='%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s' # Added filename/lineno
 )
 logger = logging.getLogger(__name__)
 
+logger.info("--- Starting App ---") # Log app start
+
 # Login to Hugging Face Hub if token is available
 HUGGINGFACE_TOKEN = os.environ.get('HUGGINGFACE_TOKEN')
 if HUGGINGFACE_TOKEN:
+    logger.info("HUGGINGFACE_TOKEN environment variable found.")
     try:
         login(token=HUGGINGFACE_TOKEN)
         logger.info("Successfully logged in to Hugging Face Hub.")
     except Exception as e:
         logger.error(f"Failed to login to Hugging Face Hub: {e}")
+        logger.error(traceback.format_exc())
+else:
+    logger.warning("HUGGINGFACE_TOKEN environment variable not set. Model loading might fail if private.")
+
 
 class ModelManager:
     _instance = None
 
     def __new__(cls):
         if cls._instance is None:
+            logger.info("Creating new ModelManager instance.")
             cls._instance = super(ModelManager, cls).__new__(cls)
             cls._instance._initialized = False
         return cls._instance
 
     def __init__(self):
         if not self._initialized:
+            logger.info("Initializing ModelManager attributes.")
             self.tokenizer = None
             self.model = None
             self.text_pipeline = None # Renamed for clarity
@@ -59,8 +68,9 @@ class ModelManager:
     @spaces.GPU(duration=120) # Increased duration for potentially long loads
     def initialize_llm(self):
         """Initialize LLM model with standard transformers"""
+        logger.info("Attempting to initialize LLM.")
         if self.llm_loading:
-            logger.info("LLM initialization already in progress.")
+            logger.info("LLM initialization already in progress. Skipping.")
             return True # Assume it will succeed or fail elsewhere
         if self.tokenizer and self.model and self.text_pipeline:
             logger.info("LLM already initialized.")
@@ -68,9 +78,10 @@ class ModelManager:
             return True
 
         self.llm_loading = True
+        logger.info("Starting LLM initialization...")
         try:
-            # Use small model for ZeroGPU compatibility
             MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+            logger.info(f"Using LLM model: {MODEL_NAME}")
 
             logger.info("Loading LLM tokenizer...")
             self.tokenizer = AutoTokenizer.from_pretrained(
@@ -78,11 +89,12 @@ class ModelManager:
                 token=HUGGINGFACE_TOKEN,
                 use_fast=True
             )
+            logger.info("LLM tokenizer loaded.")
 
             if self.tokenizer.pad_token is None:
+                logger.info("Setting pad_token to eos_token for LLM tokenizer.")
                 self.tokenizer.pad_token = self.tokenizer.eos_token
 
-            # Basic memory settings for ZeroGPU
             logger.info("Loading LLM model...")
             self.model = AutoModelForCausalLM.from_pretrained(
                 MODEL_NAME,
@@ -90,13 +102,11 @@ class ModelManager:
                 device_map="auto",
                 torch_dtype=torch.float16,
                 low_cpu_mem_usage=True,
-                # Optimizations for ZeroGPU
-                # max_memory={0: "4GB"}, # Removed for better auto handling initially
                 offload_folder="offload",
                 offload_state_dict=True
             )
+            logger.info("LLM model loaded.")
 
-            # Create text generation pipeline
             logger.info("Creating LLM text generation pipeline...")
             self.text_pipeline = pipeline(
                 "text-generation",
@@ -106,20 +116,22 @@ class ModelManager:
                 device_map="auto",
                 max_length=1024 # Default max length
             )
+            logger.info("LLM text generation pipeline created.")
 
-            logger.info("LLM initialized successfully")
+            logger.info("LLM initialized successfully.")
             self.last_used = time.time()
             self.llm_loading = False
             return True
 
         except Exception as e:
-            logger.error(f"Error initializing LLM: {str(e)}")
+            logger.error(f"!!! ERROR during LLM initialization: {str(e)}")
             logger.error(traceback.format_exc()) # Log full traceback
-            # Reset partially loaded components
+            logger.error("Resetting potentially partially loaded LLM components due to error.")
             self.tokenizer = None
             self.model = None
             self.text_pipeline = None
             if torch.cuda.is_available():
+                logger.info("Clearing CUDA cache after LLM init error.")
                 torch.cuda.empty_cache()
             gc.collect()
             self.llm_loading = False
@@ -128,8 +140,9 @@ class ModelManager:
     @spaces.GPU(duration=120) # Increased duration
     def initialize_whisper(self):
         """Initialize Whisper model for audio transcription"""
+        logger.info("Attempting to initialize Whisper.")
         if self.whisper_loading:
-            logger.info("Whisper initialization already in progress.")
+            logger.info("Whisper initialization already in progress. Skipping.")
             return True
         if self.whisper_model:
              logger.info("Whisper already initialized.")
@@ -137,27 +150,30 @@ class ModelManager:
              return True
 
         self.whisper_loading = True
+        logger.info("Starting Whisper initialization...")
         try:
-            logger.info("Loading Whisper model...")
-            # Using tiny model for efficiency but can be changed based on needs
+            WHISPER_MODEL_NAME = "tiny" # Consider "base" for better accuracy if "tiny" struggles
+            logger.info(f"Loading Whisper model: {WHISPER_MODEL_NAME}")
             # Specify weights_only=True to address the FutureWarning
             # Note: Whisper's load_model might not directly support weights_only yet.
             # If it errors, remove the weights_only=True. The warning is mainly informative.
             # Let's attempt without weights_only first as whisper might handle it internally
             self.whisper_model = whisper.load_model(
-                "tiny", # Consider "base" for better accuracy if "tiny" struggles
+                WHISPER_MODEL_NAME,
                 device="cuda" if torch.cuda.is_available() else "cpu",
                 download_root="/tmp/whisper" # Use persistent storage if available/needed
             )
-            logger.info("Whisper model initialized successfully")
+            logger.info(f"Whisper model '{WHISPER_MODEL_NAME}' loaded successfully.")
             self.last_used = time.time()
             self.whisper_loading = False
             return True
         except Exception as e:
-            logger.error(f"Error initializing Whisper: {str(e)}")
+            logger.error(f"!!! ERROR during Whisper initialization: {str(e)}")
             logger.error(traceback.format_exc())
+            logger.error("Resetting potentially partially loaded Whisper components due to error.")
             self.whisper_model = None
             if torch.cuda.is_available():
+                logger.info("Clearing CUDA cache after Whisper init error.")
                 torch.cuda.empty_cache()
             gc.collect()
             self.whisper_loading = False
@@ -165,82 +181,111 @@ class ModelManager:
 
     def check_llm_initialized(self):
         """Check if LLM is initialized and initialize if needed"""
+        logger.info("Checking if LLM is initialized.")
         if self.tokenizer is None or self.model is None or self.text_pipeline is None:
-            logger.info("LLM not initialized, initializing...")
+            logger.info("LLM not initialized, attempting initialization...")
             if not self.llm_loading: # Prevent re-entry if already loading
-                 self.initialize_llm()
+                 self.initialize_llm() # This will raise error if it fails
+                 logger.info("LLM initialization completed by check_llm_initialized.")
             else:
-                 logger.info("LLM initialization is already in progress by another request.")
+                 logger.info("LLM initialization is already in progress by another request. Waiting briefly.")
                  # Optional: Wait a bit for the other process to finish
-                 time.sleep(5)
+                 time.sleep(10) # Increased wait time
                  if self.tokenizer is None or self.model is None or self.text_pipeline is None:
+                     logger.error("LLM initialization timed out or failed after waiting.")
                      raise RuntimeError("LLM initialization timed out or failed.")
+                 else:
+                     logger.info("LLM seems initialized now after waiting.")
+        else:
+            logger.info("LLM was already initialized.")
         self.last_used = time.time()
 
     def check_whisper_initialized(self):
         """Check if Whisper model is initialized and initialize if needed"""
+        logger.info("Checking if Whisper is initialized.")
         if self.whisper_model is None:
-            logger.info("Whisper model not initialized, initializing...")
+            logger.info("Whisper model not initialized, attempting initialization...")
             if not self.whisper_loading: # Prevent re-entry
-                self.initialize_whisper()
+                self.initialize_whisper() # This will raise error if it fails
+                logger.info("Whisper initialization completed by check_whisper_initialized.")
             else:
-                logger.info("Whisper initialization is already in progress by another request.")
-                time.sleep(5)
+                logger.info("Whisper initialization is already in progress by another request. Waiting briefly.")
+                time.sleep(10) # Increased wait time
                 if self.whisper_model is None:
+                    logger.error("Whisper initialization timed out or failed after waiting.")
                     raise RuntimeError("Whisper initialization timed out or failed.")
+                else:
+                    logger.info("Whisper seems initialized now after waiting.")
+        else:
+            logger.info("Whisper was already initialized.")
         self.last_used = time.time()
 
     def reset_models(self, force=False):
         """Reset models to free memory if they haven't been used recently"""
         current_time = time.time()
-        # Only reset if forced or models haven't been used for 10 minutes (600 seconds)
-        if force or (current_time - self.last_used > 600):
+        should_reset = force or (current_time - self.last_used > 600) # 10 minutes idle threshold
+        logger.info(f"Checking if models should be reset. Force: {force}, Idle time: {current_time - self.last_used:.0f}s, Should reset: {should_reset}")
+
+        if should_reset:
             try:
-                logger.info("Resetting models to free memory...")
+                logger.info("--- Resetting models to free memory ---")
 
-                # Check and delete attributes safely
                 if hasattr(self, 'model') and self.model is not None:
                     del self.model
                     self.model = None
                     logger.info("LLM model deleted.")
+                else: logger.info("LLM model was None or not found.")
 
                 if hasattr(self, 'tokenizer') and self.tokenizer is not None:
                     del self.tokenizer
                     self.tokenizer = None
                     logger.info("LLM tokenizer deleted.")
+                else: logger.info("LLM tokenizer was None or not found.")
 
                 if hasattr(self, 'text_pipeline') and self.text_pipeline is not None:
                     del self.text_pipeline
                     self.text_pipeline = None
                     logger.info("LLM pipeline deleted.")
+                else: logger.info("LLM pipeline was None or not found.")
 
                 if hasattr(self, 'whisper_model') and self.whisper_model is not None:
                     del self.whisper_model
                     self.whisper_model = None
                     logger.info("Whisper model deleted.")
+                else: logger.info("Whisper model was None or not found.")
 
                 # Explicitly clear CUDA cache and collect garbage
                 if torch.cuda.is_available():
+                    logger.info("Clearing CUDA cache...")
                     torch.cuda.empty_cache()
-                    # torch.cuda.synchronize() # May not be needed and can slow down
                     logger.info("CUDA cache cleared.")
+                else:
+                    logger.info("CUDA not available, skipping cache clear.")
 
-                gc.collect()
-                logger.info("Garbage collected. Models reset successfully.")
+                logger.info("Running garbage collection...")
+                collected_count = gc.collect()
+                logger.info(f"Garbage collected ({collected_count} objects). Models reset successfully.")
                 self._initialized = False # Mark as uninitialized so they reload on next use
 
             except Exception as e:
-                logger.error(f"Error resetting models: {str(e)}")
+                logger.error(f"!!! ERROR during model reset: {str(e)}")
                 logger.error(traceback.format_exc())
+        else:
+            logger.info("Skipping model reset (not forced and not idle long enough).")
+
 
 # Create global model manager instance
+logger.info("Creating global ModelManager instance.")
 model_manager = ModelManager()
 
 @lru_cache(maxsize=16) # Reduced cache size slightly
 def download_social_media_video(url):
     """Download audio from a social media video URL."""
+    logger.info(f"Attempting to download audio from social media URL: {url}")
     temp_dir = tempfile.mkdtemp()
+    # Note: Using filename from info_dict can be unreliable. Let yt-dlp decide final name.
     output_template = os.path.join(temp_dir, '%(id)s.%(ext)s')
+    final_audio_file_path = None # Will store the path of the actual downloaded mp3
 
     ydl_opts = {
         'format': 'bestaudio/best',
@@ -255,72 +300,68 @@ def download_social_media_video(url):
         'nocheckcertificate': True, # Sometimes needed for tricky sites
         'retries': 3, # Add retries
         'socket_timeout': 15, # Timeout
+        'cachedir': False, # Avoid caching issues in temp envs
     }
     try:
-        logger.info(f"Attempting to download audio from: {url}")
+        logger.info(f"yt-dlp options: {ydl_opts}") # Log options for debugging
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            logger.info("Extracting info and downloading...")
+            # Download should happen here and postprocessor rename to .mp3
             info_dict = ydl.extract_info(url, download=True)
-            # Construct the expected final filename after postprocessing
-            audio_file = os.path.join(temp_dir, f"{info_dict['id']}.mp3")
-            if not os.path.exists(audio_file):
-                 # Fallback if filename doesn't match exactly (e.g., webm -> mp3)
-                 found_files = [f for f in os.listdir(temp_dir) if f.endswith('.mp3')]
-                 if found_files:
-                     audio_file = os.path.join(temp_dir, found_files[0])
-                 else:
-                     raise FileNotFoundError(f"Could not find downloaded MP3 in {temp_dir}")
+            logger.info(f"yt-dlp extraction complete for {url}. ID: {info_dict.get('id')}")
+
+            # Find the downloaded MP3 file (name might not exactly match ID if title had weird chars)
+            found_files = [f for f in os.listdir(temp_dir) if f.endswith('.mp3')]
+            if found_files:
+                final_audio_file_path = os.path.join(temp_dir, found_files[0])
+                logger.info(f"Found downloaded MP3: {final_audio_file_path}")
+            else:
+                 logger.error(f"Could not find downloaded MP3 file in {temp_dir} for URL {url}")
+                 raise FileNotFoundError(f"Downloaded MP3 not found in {temp_dir}")
 
-        logger.info(f"Audio downloaded successfully: {audio_file}")
         # Read the file content to return, as the temp dir might be cleaned up
-        with open(audio_file, 'rb') as f:
+        logger.info(f"Reading content of {final_audio_file_path}")
+        with open(final_audio_file_path, 'rb') as f:
             audio_content = f.read()
 
-        # Clean up the temporary directory and file
-        try:
-            os.remove(audio_file)
-            os.rmdir(temp_dir)
-        except OSError as e:
-            logger.warning(f"Could not completely clean up temp download files: {e}")
-
-        # Save the content to a new temporary file that Gradio can handle
+        # Save the content to a new temporary file that Gradio can handle better
+        logger.info("Saving audio content to a new temporary file...")
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_output_file:
             temp_output_file.write(audio_content)
-            final_path = temp_output_file.name
-        logger.info(f"Audio saved to temporary file: {final_path}")
-        return final_path
+            final_path_for_gradio = temp_output_file.name
+        logger.info(f"Audio content saved to temporary file for processing: {final_path_for_gradio}")
+        return final_path_for_gradio
 
     except yt_dlp.utils.DownloadError as e:
-        logger.error(f"yt-dlp download error for {url}: {str(e)}")
-        # Clean up temp dir on error
-        try:
-            if os.path.exists(temp_dir):
-                import shutil
-                shutil.rmtree(temp_dir)
-        except Exception as cleanup_e:
-            logger.warning(f"Error during cleanup after download failure: {cleanup_e}")
+        logger.error(f"!!! yt-dlp download error for {url}: {str(e)}")
+        # Don't log full traceback here as DownloadError is often informative enough
         return None # Return None to indicate failure
     except Exception as e:
-        logger.error(f"Unexpected error downloading video from {url}: {str(e)}")
+        logger.error(f"!!! Unexpected error downloading video from {url}: {str(e)}")
         logger.error(traceback.format_exc())
-        # Clean up temp dir on error
-        try:
-             if os.path.exists(temp_dir):
+        return None # Return None
+    finally:
+         # Clean up the temporary directory and its contents
+        if os.path.exists(temp_dir):
+            logger.info(f"Cleaning up temporary download directory: {temp_dir}")
+            try:
                 import shutil
                 shutil.rmtree(temp_dir)
-        except Exception as cleanup_e:
-            logger.warning(f"Error during cleanup after download failure: {cleanup_e}")
-        return None # Return None
+                logger.info("Temporary download directory cleaned up.")
+            except Exception as cleanup_e:
+                logger.warning(f"Could not completely clean up temp download directory {temp_dir}: {cleanup_e}")
+
 
 def convert_video_to_audio(video_file_path):
     """Convert a video file to audio using ffmpeg directly."""
+    logger.info(f"Attempting to convert video to audio: {video_file_path}")
+    output_file_path = None # Initialize
     try:
         # Create a temporary file path for the output MP3
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
             output_file_path = temp_file.name
+        logger.info(f"Output audio path will be: {output_file_path}")
 
-        logger.info(f"Converting video '{video_file_path}' to audio '{output_file_path}'")
-
-        # Use ffmpeg directly via subprocess
         command = [
             "ffmpeg",
             "-i", video_file_path,
@@ -331,77 +372,107 @@ def convert_video_to_audio(video_file_path):
             "-ac", "2",      # Stereo audio
             output_file_path,
             "-y",           # Overwrite output file if it exists
-            "-loglevel", "error" # Suppress verbose ffmpeg output
+            "-loglevel", "error" # Suppress verbose ffmpeg output, show only errors
         ]
+        logger.info(f"Executing ffmpeg command: {' '.join(command)}")
 
-        process = subprocess.run(command, check=True, capture_output=True, text=True)
+        process = subprocess.run(command, check=True, capture_output=True, text=True, timeout=120) # Added timeout
         logger.info(f"ffmpeg conversion successful for {video_file_path}.")
-        logger.debug(f"ffmpeg stdout: {process.stdout}")
-        logger.debug(f"ffmpeg stderr: {process.stderr}")
+        # Log stdout/stderr only if needed for debugging, can be verbose
+        # logger.debug(f"ffmpeg stdout: {process.stdout}")
+        # logger.debug(f"ffmpeg stderr: {process.stderr}")
 
 
         # Verify output file exists and has size
         if not os.path.exists(output_file_path) or os.path.getsize(output_file_path) == 0:
+            logger.error(f"ffmpeg conversion failed: Output file '{output_file_path}' not created or is empty.")
             raise RuntimeError(f"ffmpeg conversion failed: Output file '{output_file_path}' not created or is empty.")
 
-        logger.info(f"Video converted to audio: {output_file_path}")
+        logger.info(f"Video successfully converted to audio: {output_file_path}")
         return output_file_path
     except subprocess.CalledProcessError as e:
-         logger.error(f"ffmpeg command failed with exit code {e.returncode}")
+         logger.error(f"!!! ffmpeg command failed with exit code {e.returncode} for video: {video_file_path}")
          logger.error(f"ffmpeg stderr: {e.stderr}")
-         logger.error(f"ffmpeg stdout: {e.stdout}")
-         # Clean up potentially empty output file
-         if os.path.exists(output_file_path):
+         # Clean up potentially empty/invalid output file
+         if output_file_path and os.path.exists(output_file_path):
+             logger.info(f"Cleaning up failed ffmpeg output file: {output_file_path}")
              os.remove(output_file_path)
          raise RuntimeError(f"ffmpeg conversion failed: {e.stderr}") from e
+    except subprocess.TimeoutExpired as e:
+        logger.error(f"!!! ffmpeg command timed out after {e.timeout} seconds for video: {video_file_path}")
+        if output_file_path and os.path.exists(output_file_path):
+             logger.info(f"Cleaning up potentially incomplete ffmpeg output file: {output_file_path}")
+             os.remove(output_file_path)
+        raise RuntimeError(f"ffmpeg conversion timed out after {e.timeout} seconds.") from e
     except Exception as e:
-        logger.error(f"Error converting video '{video_file_path}': {str(e)}")
+        logger.error(f"!!! Error converting video '{video_file_path}': {str(e)}")
         logger.error(traceback.format_exc())
         # Clean up potentially created output file
-        if 'output_file_path' in locals() and os.path.exists(output_file_path):
-            os.remove(output_file_path)
+        if output_file_path and os.path.exists(output_file_path):
+             logger.info(f"Cleaning up ffmpeg output file due to exception: {output_file_path}")
+             os.remove(output_file_path)
         raise # Re-raise the exception
 
 def preprocess_audio(input_audio_path):
     """Preprocess the audio file (e.g., normalize volume)."""
+    logger.info(f"Attempting to preprocess audio file: {input_audio_path}")
+    output_path = None
     try:
-        logger.info(f"Preprocessing audio file: {input_audio_path}")
+        # Check if file exists before trying to load
+        if not os.path.exists(input_audio_path):
+             logger.error(f"Input audio file for preprocessing not found: {input_audio_path}")
+             raise FileNotFoundError(f"Input audio file not found: {input_audio_path}")
+
+        logger.info("Loading audio with pydub...")
         audio = AudioSegment.from_file(input_audio_path)
+        logger.info("Audio loaded.")
 
-        # Apply normalization (optional, adjust target dBFS as needed)
-        # Target loudness: -20 dBFS. Adjust gain based on current loudness.
+        # Example: Normalize volume (optional, uncomment if needed)
+        # logger.info(f"Original dBFS: {audio.dBFS}. Normalizing target: -20 dBFS.")
         # change_in_dBFS = -20.0 - audio.dBFS
         # audio = audio.apply_gain(change_in_dBFS)
+        # logger.info("Volume normalization applied.")
 
         # Export to a new temporary file
+        logger.info("Exporting preprocessed audio...")
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
             output_path = temp_file.name
             audio.export(output_path, format="mp3")
-
         logger.info(f"Audio preprocessed and saved to: {output_path}")
         return output_path
+    except FileNotFoundError as e:
+        logger.error(f"!!! File not found during audio preprocessing: {e}")
+        raise # Reraise specific error
     except Exception as e:
-        logger.error(f"Error preprocessing audio '{input_audio_path}': {str(e)}")
+        logger.error(f"!!! Error preprocessing audio '{input_audio_path}': {str(e)}")
         logger.error(traceback.format_exc())
-        # Return original path if preprocessing fails? Or raise error?
-        # Let's raise the error to signal failure clearly.
-        raise
+        # Clean up potentially created output file if error occurred during export
+        if output_path and os.path.exists(output_path):
+             logger.info(f"Cleaning up preprocessing output file due to exception: {output_path}")
+             os.remove(output_path)
+        raise # Re-raise the exception
 
 @spaces.GPU(duration=300) # Allow more time for transcription
 def transcribe_audio_or_video(file_input):
     """Transcribe an audio or video file (local path or Gradio File object)."""
+    logger.info(f"--- Starting transcription process for input: {type(file_input)} ---")
     audio_file_to_transcribe = None
     original_input_path = None
     temp_files_to_clean = []
+    processing_step = "Initialization"
 
     try:
-        model_manager.check_whisper_initialized()
+        processing_step = "Whisper Model Check"
+        logger.info("Checking/Initializing Whisper model...")
+        model_manager.check_whisper_initialized() # Will raise error if fails
+        logger.info("Whisper model is ready.")
 
         if file_input is None:
-            logger.info("No file input provided for transcription.")
+            logger.info("No file input provided for transcription. Returning empty string.")
             return "" # Return empty string for None input
 
         # Determine input type and get file path
+        processing_step = "Input Type Handling"
         if isinstance(file_input, str): # Input is a path
             original_input_path = file_input
             logger.info(f"Processing path input: {original_input_path}")
@@ -409,449 +480,626 @@ def transcribe_audio_or_video(file_input):
                  logger.error(f"Input file path does not exist: {original_input_path}")
                  raise FileNotFoundError(f"Input file not found: {original_input_path}")
             input_path = original_input_path
-        elif hasattr(file_input, 'name'): # Input is a Gradio File object
+        elif hasattr(file_input, 'name') and file_input.name: # Input is a Gradio File object
             original_input_path = file_input.name
-            logger.info(f"Processing Gradio file input: {original_input_path}")
+            logger.info(f"Processing Gradio file input. Temp path: {original_input_path}")
+            if not os.path.exists(original_input_path):
+                logger.error(f"Gradio temporary file path does not exist: {original_input_path}")
+                raise FileNotFoundError(f"Gradio temporary file not found: {original_input_path}")
             input_path = original_input_path # Gradio usually provides a temp path
         else:
             logger.error(f"Unsupported input type for transcription: {type(file_input)}")
             raise TypeError("Invalid input type for transcription. Expected file path or Gradio File object.")
 
+        logger.info(f"Input path identified: {input_path}")
         file_extension = os.path.splitext(input_path)[1].lower()
+        logger.info(f"File extension: {file_extension}")
 
         # Check if it's a video file that needs conversion
+        processing_step = "Video Conversion Check"
         if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.webm']:
-            logger.info(f"Detected video file ({file_extension}), converting to audio...")
-            converted_audio_path = convert_video_to_audio(input_path)
+            logger.info(f"Detected video file ({file_extension}), attempting conversion to audio...")
+            converted_audio_path = convert_video_to_audio(input_path) # Raises error on failure
+            logger.info(f"Video converted to audio: {converted_audio_path}")
             temp_files_to_clean.append(converted_audio_path)
             audio_file_to_process = converted_audio_path
-        elif file_extension in ['.mp3', '.wav', '.ogg', '.flac', '.m4a']:
+        elif file_extension in ['.mp3', '.wav', '.ogg', '.flac', '.m4a', '.aac']: # Added more audio types
              logger.info(f"Detected audio file ({file_extension}).")
              audio_file_to_process = input_path
         else:
             logger.error(f"Unsupported file extension for transcription: {file_extension}")
             raise ValueError(f"Unsupported file type: {file_extension}")
 
-        # Preprocess the audio (optional, could be skipped if causing issues)
+        # Preprocess the audio (optional)
+        processing_step = "Audio Preprocessing"
         try:
+            logger.info(f"Attempting to preprocess audio file: {audio_file_to_process}")
             preprocessed_audio_path = preprocess_audio(audio_file_to_process)
             # If preprocessing creates a new file different from the input, add it to cleanup
             if preprocessed_audio_path != audio_file_to_process:
+                 logger.info("Preprocessing created a new file, adding to cleanup list.")
                  temp_files_to_clean.append(preprocessed_audio_path)
             audio_file_to_transcribe = preprocessed_audio_path
+            logger.info(f"Audio preprocessing successful. File to transcribe: {audio_file_to_transcribe}")
         except Exception as preprocess_err:
-            logger.warning(f"Audio preprocessing failed: {preprocess_err}. Using original/converted audio.")
+            logger.warning(f"Audio preprocessing failed: {preprocess_err}. Using original/converted audio for transcription.")
+            logger.warning(traceback.format_exc()) # Log warning traceback
             audio_file_to_transcribe = audio_file_to_process # Fallback
 
-        logger.info(f"Transcribing audio file: {audio_file_to_transcribe}")
+        processing_step = "Transcription"
+        logger.info(f"Starting transcription for: {audio_file_to_transcribe}")
         if not os.path.exists(audio_file_to_transcribe):
+            logger.error(f"Audio file to transcribe not found: {audio_file_to_transcribe}")
             raise FileNotFoundError(f"Audio file to transcribe not found: {audio_file_to_transcribe}")
 
         # Perform transcription
+        logger.info("Calling Whisper model transcribe method...")
         with torch.inference_mode(): # Ensure inference mode for efficiency
             # Use fp16 if available on CUDA
             use_fp16 = torch.cuda.is_available()
+            logger.info(f"Using fp16 for transcription: {use_fp16}")
             result = model_manager.whisper_model.transcribe(
                 audio_file_to_transcribe,
                 fp16=use_fp16
+                # language="en" # Optional: specify language if known
             )
-            if not result:
+            logger.info("Whisper model transcribe method finished.")
+            if not result or "text" not in result:
+                logger.error("Transcription failed to produce results or 'text' key missing.")
                 raise RuntimeError("Transcription failed to produce results")
 
         transcription = result.get("text", "Error: Transcription result empty")
-        # Limit transcription length shown in logs
         log_transcription = (transcription[:100] + '...') if len(transcription) > 100 else transcription
-        logger.info(f"Transcription completed: {log_transcription}")
+        logger.info(f"Transcription completed successfully: '{log_transcription}'")
 
+        processing_step = "Success"
         return transcription
 
     except FileNotFoundError as e:
-        logger.error(f"File not found error during transcription: {e}")
+        logger.error(f"!!! File not found error during transcription (Step: {processing_step}): {e}")
+        logger.error(traceback.format_exc())
         return f"Error: Input file not found ({e})"
     except ValueError as e:
-         logger.error(f"Value error during transcription: {e}")
+         logger.error(f"!!! Value error during transcription (Step: {processing_step}): {e}")
+         logger.error(traceback.format_exc())
          return f"Error: Unsupported file type ({e})"
     except TypeError as e:
-         logger.error(f"Type error during transcription setup: {e}")
+         logger.error(f"!!! Type error during transcription setup (Step: {processing_step}): {e}")
+         logger.error(traceback.format_exc())
          return f"Error: Invalid input provided ({e})"
     except RuntimeError as e:
-         logger.error(f"Runtime error during transcription: {e}")
+         logger.error(f"!!! Runtime error during transcription (Step: {processing_step}): {e}")
          logger.error(traceback.format_exc())
          return f"Error during processing: {e}"
     except Exception as e:
-        logger.error(f"Unexpected error during transcription: {str(e)}")
+        logger.error(f"!!! Unexpected error during transcription (Step: {processing_step}): {str(e)}")
         logger.error(traceback.format_exc())
         return f"Error processing the file: An unexpected error occurred."
 
     finally:
         # Clean up all temporary files created during the process
+        logger.info(f"--- Cleaning up temporary files for transcription process ({len(temp_files_to_clean)} files) ---")
         for temp_file in temp_files_to_clean:
             try:
                 if os.path.exists(temp_file):
                     os.remove(temp_file)
                     logger.info(f"Cleaned up temporary file: {temp_file}")
+                else:
+                    logger.info(f"Temporary file already removed or never created: {temp_file}")
             except Exception as e:
                 logger.warning(f"Could not remove temporary file {temp_file}: {str(e)}")
-        # Optionally reset models if idle (might be too aggressive here)
-        # model_manager.reset_models()
+        logger.info("--- Finished transcription process cleanup ---")
+
 
 @lru_cache(maxsize=16)
 def read_document(document_path):
     """Read the content of a document (PDF, DOCX, XLSX, CSV)."""
+    logger.info(f"Attempting to read document: {document_path}")
     try:
-        logger.info(f"Reading document: {document_path}")
         if not os.path.exists(document_path):
+            logger.error(f"Document not found at path: {document_path}")
             raise FileNotFoundError(f"Document not found: {document_path}")
 
         file_extension = os.path.splitext(document_path)[1].lower()
+        logger.info(f"Document type detected: {file_extension}")
 
+        content = ""
         if file_extension == ".pdf":
+            logger.info("Reading PDF document using PyMuPDF (fitz)...")
             doc = fitz.open(document_path)
-            text = "\n".join([page.get_text() for page in doc])
+            content = "\n".join([page.get_text() for page in doc])
             doc.close()
-            return text
+            logger.info(f"PDF read successfully. Length: {len(content)} chars.")
         elif file_extension == ".docx":
+            logger.info("Reading DOCX document using python-docx...")
             doc = docx.Document(document_path)
-            return "\n".join([paragraph.text for paragraph in doc.paragraphs])
+            content = "\n".join([paragraph.text for paragraph in doc.paragraphs])
+            logger.info(f"DOCX read successfully. Length: {len(content)} chars.")
         elif file_extension in (".xlsx", ".xls"):
-            # Read all sheets and combine
+            logger.info("Reading Excel document using pandas...")
             xls = pd.ExcelFile(document_path)
-            text = ""
+            text_parts = []
             for sheet_name in xls.sheet_names:
+                logger.info(f"Reading sheet: {sheet_name}")
                 df = pd.read_excel(xls, sheet_name=sheet_name)
-                text += f"--- Sheet: {sheet_name} ---\n{df.to_string()}\n\n"
-            return text.strip()
+                text_parts.append(f"--- Sheet: {sheet_name} ---\n{df.to_string()}")
+            content = "\n\n".join(text_parts).strip()
+            logger.info(f"Excel read successfully. Length: {len(content)} chars.")
         elif file_extension == ".csv":
+            logger.info("Reading CSV document using pandas...")
             # Try detecting separator
             try:
+                logger.info("Attempting CSV read with comma separator...")
                 df = pd.read_csv(document_path)
             except pd.errors.ParserError:
-                 logger.warning(f"Could not parse CSV {document_path} with default comma separator, trying semicolon.")
+                 logger.warning(f"Could not parse CSV {document_path} with comma separator, trying semicolon.")
                  df = pd.read_csv(document_path, sep=';')
-            return df.to_string()
+            except Exception as csv_err: # Catch other potential pandas errors
+                 logger.error(f"Error reading CSV {document_path}: {csv_err}")
+                 raise
+            content = df.to_string()
+            logger.info(f"CSV read successfully. Length: {len(content)} chars.")
         else:
-            logger.warning(f"Unsupported document type: {file_extension}")
+            logger.warning(f"Unsupported document type for reading: {file_extension}")
             return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document."
+
+        return content
+
     except FileNotFoundError as e:
-        logger.error(f"Error reading document: {e}")
+        logger.error(f"!!! File not found error while reading document: {e}")
+        # logger.error(traceback.format_exc()) # Traceback might be less useful here
         return f"Error: Document file not found at {document_path}"
     except Exception as e:
-        logger.error(f"Error reading document {document_path}: {str(e)}")
+        logger.error(f"!!! Error reading document {document_path}: {str(e)}")
         logger.error(traceback.format_exc())
         return f"Error reading document: {str(e)}"
 
 @lru_cache(maxsize=16)
 def read_url(url):
     """Read the main textual content of a URL."""
+    logger.info(f"Attempting to read URL: {url}")
     if not url or not url.strip().startswith('http'):
-        logger.info(f"Invalid or empty URL provided: '{url}'")
+        logger.warning(f"Invalid or empty URL provided: '{url}'")
         return "" # Return empty for invalid or empty URLs
 
     try:
-        logger.info(f"Reading URL: {url}")
         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
+        logger.info(f"Sending GET request to {url} with headers: {headers}")
         # Increased timeout
         response = requests.get(url, headers=headers, timeout=20, allow_redirects=True)
+        logger.info(f"Received response from {url}. Status code: {response.status_code}")
         response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
 
         # Check content type - proceed only if likely HTML/text
         content_type = response.headers.get('content-type', '').lower()
+        logger.info(f"URL content type: {content_type}")
         if not ('html' in content_type or 'text' in content_type):
              logger.warning(f"URL {url} has non-text content type: {content_type}. Skipping.")
              return f"Error: URL content type ({content_type}) is not text/html."
 
+        logger.info(f"Parsing HTML content from {url} using BeautifulSoup...")
         soup = BeautifulSoup(response.content, 'html.parser')
+        logger.info("HTML parsed.")
 
         # Remove non-content elements like scripts, styles, nav, footers etc.
-        for element in soup(["script", "style", "meta", "noscript", "iframe", "header", "footer", "nav", "aside", "form", "button"]):
-            element.extract()
+        logger.info("Removing script, style, and other non-content tags...")
+        tags_to_remove = ["script", "style", "meta", "noscript", "iframe", "header", "footer", "nav", "aside", "form", "button"]
+        for tag_name in tags_to_remove:
+            for element in soup.find_all(tag_name):
+                element.extract()
+        logger.info("Non-content tags removed.")
 
         # Attempt to find main content area (common tags/attributes)
+        logger.info("Attempting to find main content container...")
         main_content = (
             soup.find("main") or
             soup.find("article") or
-            soup.find("div", class_=["content", "main", "post-content", "entry-content", "article-body"]) or
-            soup.find("div", id=["content", "main", "article"])
+            soup.find("div", class_=["content", "main", "post-content", "entry-content", "article-body", "story-content"]) or # Added more common classes
+            soup.find("div", id=["content", "main", "article", "story"]) # Added more common IDs
         )
 
+        text = ""
         if main_content:
+            logger.info("Main content container found. Extracting text.")
             text = main_content.get_text(separator='\n', strip=True)
         else:
-            # Fallback to body if no specific main content found
+            logger.warning(f"No specific main content container found for {url}. Falling back to body text.")
             body = soup.find("body")
             if body:
+                logger.info("Extracting text from body.")
                 text = body.get_text(separator='\n', strip=True)
             else: # Very basic fallback
+                 logger.warning(f"No body tag found for {url}. Falling back to all text.")
                  text = soup.get_text(separator='\n', strip=True)
 
         # Clean up whitespace: replace multiple newlines/spaces with single ones
-        text = '\n'.join([line.strip() for line in text.split('\n') if line.strip()])
-        text = ' '.join(text.split()) # Consolidate spaces within lines
+        logger.info("Cleaning extracted text whitespace...")
+        lines = [line.strip() for line in text.split('\n') if line.strip()]
+        cleaned_text = "\n".join(lines)
+        # cleaned_text = ' '.join(cleaned_text.split()) # Consolidate spaces - might merge paragraphs inappropriately, use newline join instead
+        logger.info(f"Text cleaning complete. Initial length: {len(text)}, Cleaned length: {len(cleaned_text)}")
+
 
-        if not text:
+        if not cleaned_text:
             logger.warning(f"Could not extract meaningful text from URL: {url}")
             return "Error: Could not extract text content from URL."
 
         # Limit content size to avoid overwhelming the LLM
         max_chars = 15000
-        if len(text) > max_chars:
-            logger.info(f"URL content truncated to {max_chars} characters.")
-            text = text[:max_chars] + "... [content truncated]"
+        if len(cleaned_text) > max_chars:
+            logger.info(f"URL content is long ({len(cleaned_text)} chars), truncating to {max_chars} characters.")
+            final_text = cleaned_text[:max_chars] + "... [content truncated]"
+        else:
+            final_text = cleaned_text
 
-        return text
+        logger.info(f"Successfully read and processed URL {url}. Final text length: {len(final_text)}")
+        return final_text
     except requests.exceptions.RequestException as e:
-        logger.error(f"Error fetching URL {url}: {str(e)}")
+        logger.error(f"!!! Error fetching URL {url}: {str(e)}")
+        # logger.error(traceback.format_exc()) # Traceback might not be needed for RequestException
         return f"Error reading URL: Could not fetch content ({e})"
     except Exception as e:
-        logger.error(f"Error parsing URL {url}: {str(e)}")
+        logger.error(f"!!! Error parsing URL {url}: {str(e)}")
         logger.error(traceback.format_exc())
         return f"Error reading URL: Could not parse content ({e})"
 
 def process_social_media_url(url):
     """Process a social media URL, attempting to get text and transcribe video/audio."""
+    logger.info(f"--- Starting processing for social media URL: {url} ---")
     if not url or not url.strip().startswith('http'):
-        logger.info(f"Invalid or empty social media URL: '{url}'")
+        logger.warning(f"Invalid or empty social media URL provided: '{url}'")
         return None
 
-    logger.info(f"Processing social media URL: {url}")
     text_content = None
     video_transcription = None
     error_occurred = False
+    temp_audio_file = None
 
-    # 1. Try extracting text content using read_url (might work for some platforms/posts)
+    # 1. Try extracting text content using read_url
+    logger.info(f"Attempting to read text content from social URL: {url}")
     try:
-        text_content = read_url(url)
-        if text_content and text_content.startswith("Error:"):
-             logger.warning(f"Failed to read text content from social URL {url}: {text_content}")
-             text_content = None # Reset if it was an error message
+        text_content_result = read_url(url)
+        if text_content_result and not text_content_result.startswith("Error:"):
+             text_content = text_content_result
+             logger.info(f"Successfully read text content from {url}. Length: {len(text_content)}")
+        elif text_content_result:
+             logger.warning(f"read_url returned an error for {url}: {text_content_result}")
+             error_occurred = True # Mark as error but continue
+        else:
+            logger.info(f"No text content extracted by read_url for {url}.")
     except Exception as e:
-        logger.error(f"Error reading text content from social URL {url}: {e}")
+        logger.error(f"!!! Exception during text content extraction from social URL {url}: {e}")
+        logger.error(traceback.format_exc())
         error_occurred = True
 
     # 2. Try downloading and transcribing potential video/audio content
-    downloaded_audio_path = None
+    logger.info(f"Attempting to download audio/video content from social URL: {url}")
     try:
-        downloaded_audio_path = download_social_media_video(url)
-        if downloaded_audio_path:
-            logger.info(f"Audio downloaded from {url}, proceeding to transcription.")
-            video_transcription = transcribe_audio_or_video(downloaded_audio_path)
-            if video_transcription and video_transcription.startswith("Error"):
-                logger.warning(f"Transcription failed for audio from {url}: {video_transcription}")
-                video_transcription = None # Reset if it was an error
+        temp_audio_file = download_social_media_video(url) # Returns path or None
+        if temp_audio_file:
+            logger.info(f"Audio downloaded from {url} to {temp_audio_file}. Proceeding to transcription.")
+            # Transcribe the downloaded audio file
+            transcription_result = transcribe_audio_or_video(temp_audio_file) # Handles errors internally
+            if transcription_result and not transcription_result.startswith("Error"):
+                video_transcription = transcription_result
+                logger.info(f"Successfully transcribed audio from {url}. Length: {len(video_transcription)}")
+            elif transcription_result:
+                 logger.warning(f"Transcription returned an error for audio from {url}: {transcription_result}")
+                 error_occurred = True # Mark as error but maybe text content worked
+            else:
+                 logger.warning(f"Transcription returned empty result for audio from {url}.")
         else:
              logger.info(f"No downloadable audio/video found or download failed for URL: {url}")
     except Exception as e:
-        logger.error(f"Error processing video content from social URL {url}: {e}")
+        logger.error(f"!!! Exception during video/audio processing for social URL {url}: {e}")
         logger.error(traceback.format_exc())
         error_occurred = True
     finally:
          # Clean up downloaded file if it exists
-        if downloaded_audio_path and os.path.exists(downloaded_audio_path):
+        if temp_audio_file and os.path.exists(temp_audio_file):
+            logger.info(f"Cleaning up downloaded social media audio file: {temp_audio_file}")
             try:
-                os.remove(downloaded_audio_path)
-                logger.info(f"Cleaned up downloaded audio: {downloaded_audio_path}")
+                os.remove(temp_audio_file)
+                logger.info("Downloaded audio file removed.")
             except Exception as e:
-                logger.warning(f"Failed to cleanup downloaded audio {downloaded_audio_path}: {e}")
-
-    # Return results only if some content was found or no critical error occurred
-    if text_content or video_transcription or not error_occurred:
-        return {
-            "text": text_content or "", # Ensure string type
-            "video": video_transcription or "" # Ensure string type
-        }
-    else:
-        logger.error(f"Failed to process social media URL {url} completely.")
-        return None # Indicate failure
+                logger.warning(f"Failed to cleanup downloaded audio {temp_audio_file}: {e}")
+
+    # Return results
+    logger.info(f"--- Finished processing social media URL: {url} ---")
+    # Return dict even if empty, let caller decide if it's useful
+    return {
+        "text": text_content or "", # Ensure string type
+        "video": video_transcription or "" # Ensure string type
+    }
 
 
 @spaces.GPU(duration=300) # Allow more time for generation
 def generate_news(instructions, facts, size, tone, *args):
     """Generate a news article based on provided data using an LLM."""
     request_start_time = time.time()
-    logger.info("Received request to generate news.")
+    logger.info("--- generate_news function started ---")
+    raw_transcriptions = "" # Initialize transcription log string
+    generated_article = "Error: Processing failed before generation could start." # Default error msg
+
     try:
-        # Ensure size is integer
+        # --- Parameter Logging & Basic Validation ---
+        logger.info(f"Received Instructions: {'Yes' if instructions else 'No'}")
+        logger.info(f"Received Facts: {'Yes' if facts else 'No'}")
+        logger.info(f"Requested Size: {size}, Tone: {tone}")
+        logger.info(f"Number of dynamic arguments (*args): {len(args)}")
         try:
             size = int(size) if size else 250 # Default size if None/empty
         except ValueError:
             logger.warning(f"Invalid size value '{size}', defaulting to 250.")
             size = 250
+        logger.info(f"Using Size: {size}")
 
-        # Check if models are initialized, load if necessary
-        model_manager.check_llm_initialized() # LLM is essential
-        # Whisper might be needed later, check/load if audio sources exist
 
         # --- Argument Parsing ---
-        # The order *must* match the order components are added to inputs_list in create_demo
-        # Fixed inputs: instructions, facts, size, tone (already passed directly)
-        # Dynamic inputs from *args:
-        # Expected order in *args based on create_demo:
-        # 5 Documents, 15 Audio-related, 5 URLs, 9 Social-related
+        logger.info("Parsing dynamic arguments...")
         num_docs = 5
         num_audio_sources = 5
         num_audio_inputs_per_source = 3
         num_urls = 5
         num_social_sources = 3
         num_social_inputs_per_source = 3
-
         total_expected_args = num_docs + (num_audio_sources * num_audio_inputs_per_source) + num_urls + (num_social_sources * num_social_inputs_per_source)
 
         args_list = list(args)
-        # Pad args_list with None if fewer arguments were received than expected
-        args_list.extend([None] * (total_expected_args - len(args_list)))
+        if len(args_list) < total_expected_args:
+             logger.warning(f"Received fewer arguments ({len(args_list)}) than expected ({total_expected_args}). Padding with None.")
+             args_list.extend([None] * (total_expected_args - len(args_list)))
+        elif len(args_list) > total_expected_args:
+             logger.warning(f"Received more arguments ({len(args_list)}) than expected ({total_expected_args}). Truncating.")
+             args_list = args_list[:total_expected_args]
 
         # Slice arguments based on the expected order
         doc_files = args_list[0:num_docs]
         audio_inputs_flat = args_list[num_docs : num_docs + (num_audio_sources * num_audio_inputs_per_source)]
         url_inputs = args_list[num_docs + (num_audio_sources * num_audio_inputs_per_source) : num_docs + (num_audio_sources * num_audio_inputs_per_source) + num_urls]
         social_inputs_flat = args_list[num_docs + (num_audio_sources * num_audio_inputs_per_source) + num_urls : total_expected_args]
+        logger.info(f"Argument parsing complete. Docs: {len(doc_files)}, Audio sets: {len(audio_inputs_flat)//3}, URLs: {len(url_inputs)}, Social sets: {len(social_inputs_flat)//3}")
 
         knowledge_base = {
             "instructions": instructions or "No specific instructions provided.",
             "facts": facts or "No specific facts provided.",
             "document_content": [],
-            "audio_data": [],
+            "audio_data": [], # Will store dicts: {file_path, name, position, original_filename}
             "url_content": [],
-            "social_content": []
+            "social_content": [] # Will store dicts from process_social_media_url
         }
-        raw_transcriptions = "" # Initialize transcription log
 
-        # --- Process Inputs ---
-        logger.info("Processing document inputs...")
+
+        # --- Process Document Inputs ---
+        logger.info("--- Processing document inputs ---")
+        doc_counter = 0
         for i, doc_file in enumerate(doc_files):
-            if doc_file and hasattr(doc_file, 'name'):
+            if doc_file and hasattr(doc_file, 'name') and doc_file.name:
+                doc_filename = os.path.basename(doc_file.name)
+                logger.info(f"Attempting to read document {i+1}: {doc_filename} (Path: {doc_file.name})")
                 try:
                     content = read_document(doc_file.name) # doc_file.name is the temp path
-                    if content and not content.startswith("Error"):
-                        # Truncate long documents for the knowledge base summary
+                    if content and content.startswith("Error:"):
+                         logger.warning(f"Skipping document {i+1} ({doc_filename}) due to read error: {content}")
+                         raw_transcriptions += f"[Document {i+1}: {doc_filename}] Error reading: {content}\n\n"
+                    elif content:
                         doc_excerpt = (content[:1000] + "... [document truncated]") if len(content) > 1000 else content
-                        knowledge_base["document_content"].append(f"[Document {i+1} Source: {os.path.basename(doc_file.name)}]\n{doc_excerpt}")
+                        knowledge_base["document_content"].append(f"[Document {i+1} Source: {doc_filename}]\n{doc_excerpt}")
+                        logger.info(f"Successfully processed document {i+1}. Added excerpt to knowledge base.")
+                        doc_counter += 1
+                        # Add full content to raw_transcriptions log? Might be too verbose.
+                        # raw_transcriptions += f"[Document {i+1}: {doc_filename}]\n{content}\n\n"
                     else:
-                        logger.warning(f"Skipping document {i+1} due to read error or empty content: {content}")
+                         logger.warning(f"Skipping document {i+1} ({doc_filename}) because content is empty after reading.")
+                         raw_transcriptions += f"[Document {i+1}: {doc_filename}] Read successfully but content is empty.\n\n"
                 except Exception as e:
-                    logger.error(f"Failed to process document {i+1} ({doc_file.name}): {e}")
-            # No cleanup needed here, Gradio handles temp file uploads
+                    logger.error(f"!!! FAILED to process document {i+1} ({doc_filename}): {e}")
+                    logger.error(traceback.format_exc())
+                    raw_transcriptions += f"[Document {i+1}: {doc_filename}] CRITICAL Error during processing: {e}\n\n"
+            else:
+                 logger.info(f"Skipping document slot {i+1}: No file provided or invalid file object.")
+        logger.info(f"--- Finished processing document inputs. {doc_counter} documents added. ---")
+        # Gradio handles cleanup of the uploaded temp file doc_file.name
+
 
-        logger.info("Processing URL inputs...")
+        # --- Process URL Inputs ---
+        logger.info("--- Processing URL inputs ---")
+        url_counter = 0
         for i, url in enumerate(url_inputs):
              if url and isinstance(url, str) and url.strip().startswith('http'):
+                logger.info(f"Attempting to read URL {i+1}: {url}")
                 try:
                     content = read_url(url)
-                    if content and not content.startswith("Error"):
-                         # Content is already truncated in read_url if needed
+                    if content and content.startswith("Error:"):
+                         logger.warning(f"Skipping URL {i+1} ({url}) due to read error: {content}")
+                         raw_transcriptions += f"[URL {i+1}: {url}] Error reading: {content}\n\n"
+                    elif content:
+                        # Content is already truncated in read_url if needed
                         knowledge_base["url_content"].append(f"[URL {i+1} Source: {url}]\n{content}")
+                        logger.info(f"Successfully processed URL {i+1}. Added content to knowledge base.")
+                        url_counter += 1
                     else:
-                         logger.warning(f"Skipping URL {i+1} ({url}) due to read error or empty content: {content}")
+                         logger.warning(f"Skipping URL {i+1} ({url}) because content is empty after reading.")
+                         raw_transcriptions += f"[URL {i+1}: {url}] Read successfully but content is empty.\n\n"
                 except Exception as e:
-                     logger.error(f"Failed to process URL {i+1} ({url}): {e}")
+                     logger.error(f"!!! FAILED to process URL {i+1} ({url}): {e}")
+                     logger.error(traceback.format_exc())
+                     raw_transcriptions += f"[URL {i+1}: {url}] CRITICAL Error during processing: {e}\n\n"
+             elif url and isinstance(url, str) and url.strip():
+                 logger.warning(f"Skipping URL slot {i+1}: Input '{url}' is not a valid HTTP/HTTPS URL.")
+             else:
+                 logger.info(f"Skipping URL slot {i+1}: No URL provided.")
+        logger.info(f"--- Finished processing URL inputs. {url_counter} URLs added. ---")
+
 
-        logger.info("Processing audio/video inputs...")
+        # --- Process Audio/Video Inputs ---
+        logger.info("--- Processing audio/video inputs (collecting info) ---")
         has_audio_source = False
+        audio_counter = 0
         for i in range(num_audio_sources):
             start_idx = i * num_audio_inputs_per_source
-            audio_file = audio_inputs_flat[start_idx]
-            name = audio_inputs_flat[start_idx + 1] or f"Source {i+1}"
-            position = audio_inputs_flat[start_idx + 2] or "N/A"
-
-            if audio_file and hasattr(audio_file, 'name'):
-                 # Store info for transcription later
-                 knowledge_base["audio_data"].append({
-                    "file_path": audio_file.name, # Use the temp path
-                    "name": name,
-                    "position": position,
-                    "original_filename": os.path.basename(audio_file.name) # Keep original for logs
-                })
-                 has_audio_source = True
-                 logger.info(f"Added audio source {i+1}: {name} ({position}) - File: {knowledge_base['audio_data'][-1]['original_filename']}")
-
-        logger.info("Processing social media inputs...")
+            # Check if indices are valid before accessing
+            if start_idx + 2 < len(audio_inputs_flat):
+                audio_file = audio_inputs_flat[start_idx]
+                name = audio_inputs_flat[start_idx + 1] or f"Unnamed Audio Source {i+1}"
+                position = audio_inputs_flat[start_idx + 2] or "Role N/A"
+
+                if audio_file and hasattr(audio_file, 'name') and audio_file.name:
+                    audio_filename = os.path.basename(audio_file.name)
+                    logger.info(f"Found audio/video source {i+1}: {name} ({position}) - File: {audio_filename} (Path: {audio_file.name})")
+                    # Store info for transcription later
+                    knowledge_base["audio_data"].append({
+                        "file_path": audio_file.name, # Use the temp path
+                        "name": name,
+                        "position": position,
+                        "original_filename": audio_filename
+                    })
+                    has_audio_source = True
+                    audio_counter += 1
+                else:
+                     logger.info(f"Skipping audio source slot {i+1}: No file provided or invalid file object.")
+            else:
+                logger.warning(f"Index out of bounds when processing audio source {i+1}. Check argument parsing logic.")
+                break # Stop processing further audio if indexing is wrong
+        logger.info(f"--- Finished collecting audio/video input info. {audio_counter} sources found. Transcription needed: {has_audio_source} ---")
+
+
+        # --- Process Social Media Inputs ---
+        logger.info("--- Processing social media inputs ---")
         has_social_source = False
+        social_counter = 0
         for i in range(num_social_sources):
              start_idx = i * num_social_inputs_per_source
-             social_url = social_inputs_flat[start_idx]
-             social_name = social_inputs_flat[start_idx + 1] or f"Social Source {i+1}"
-             social_context = social_inputs_flat[start_idx + 2] or "N/A"
-
-             if social_url and isinstance(social_url, str) and social_url.strip().startswith('http'):
-                 try:
-                     logger.info(f"Processing social media URL {i+1}: {social_url}")
-                     social_data = process_social_media_url(social_url)
-                     if social_data:
-                         knowledge_base["social_content"].append({
-                            "url": social_url,
-                            "name": social_name,
-                            "context": social_context,
-                            "text": social_data.get("text", ""),
-                            "video_transcription": social_data.get("video", "") # Store potential transcription
-                        })
-                         has_social_source = True
-                         logger.info(f"Added social source {i+1}: {social_name} ({social_context}) from {social_url}")
-                     else:
-                         logger.warning(f"Could not retrieve any content for social URL {i+1}: {social_url}")
-                 except Exception as e:
-                     logger.error(f"Failed to process social URL {i+1} ({social_url}): {e}")
-
-
-        # --- Transcribe Audio/Video ---
-        # Only initialize Whisper if needed
+             if start_idx + 2 < len(social_inputs_flat):
+                 social_url = social_inputs_flat[start_idx]
+                 social_name = social_inputs_flat[start_idx + 1] or f"Unnamed Social Source {i+1}"
+                 social_context = social_inputs_flat[start_idx + 2] or "Context N/A"
+
+                 if social_url and isinstance(social_url, str) and social_url.strip().startswith('http'):
+                     logger.info(f"Attempting to process social media URL {i+1}: {social_url} ({social_name}, {social_context})")
+                     try:
+                         social_data = process_social_media_url(social_url) # Returns dict or None
+                         if social_data and (social_data.get("text") or social_data.get("video")):
+                             logger.info(f"Successfully processed social URL {i+1}. Text found: {bool(social_data.get('text'))}, Video transcription found: {bool(social_data.get('video'))}")
+                             knowledge_base["social_content"].append({
+                                "url": social_url,
+                                "name": social_name,
+                                "context": social_context,
+                                "text": social_data.get("text", ""),
+                                "video_transcription": social_data.get("video", "") # Store potential transcription
+                            })
+                             has_social_source = True # Mark even if only text is found
+                             social_counter += 1
+                         elif social_data:
+                             logger.warning(f"Processed social URL {i+1} ({social_url}) but found no text or video content.")
+                             raw_transcriptions += f"[Social Media {i+1}: {social_url} ({social_name})] Processed but no content found.\n\n"
+                         else:
+                             # process_social_media_url returning None implies an error occurred during processing
+                             logger.error(f"Processing failed for social URL {i+1} ({social_url}). See previous logs.")
+                             raw_transcriptions += f"[Social Media {i+1}: {social_url} ({social_name})] Error during processing.\n\n"
+                     except Exception as e:
+                         logger.error(f"!!! FAILED to process social URL {i+1} ({social_url}): {e}")
+                         logger.error(traceback.format_exc())
+                         raw_transcriptions += f"[Social Media {i+1}: {social_url} ({social_name})] CRITICAL Error during processing: {e}\n\n"
+                 elif social_url and isinstance(social_url, str) and social_url.strip():
+                     logger.warning(f"Skipping social media slot {i+1}: Input '{social_url}' is not a valid HTTP/HTTPS URL.")
+                 else:
+                    logger.info(f"Skipping social media slot {i+1}: No URL provided.")
+             else:
+                  logger.warning(f"Index out of bounds when processing social source {i+1}. Check argument parsing logic.")
+                  break
+        logger.info(f"--- Finished processing social media inputs. {social_counter} sources added. ---")
+
+
+        # --- Transcribe Audio/Video (Conditional) ---
         transcriptions_for_prompt = ""
-        if has_audio_source or any(sc.get("video_transcription") == "[NEEDS_TRANSCRIPTION]" for sc in knowledge_base["social_content"]): # Check if transcription actually needed
-            logger.info("Audio sources detected, ensuring Whisper model is ready...")
+        if has_audio_source:
+            logger.info("--- Starting Audio Transcription Phase ---")
             try:
+                # Ensure Whisper is ready (check_whisper_initialized raises error if fails)
+                logger.info("Ensuring Whisper model is initialized for transcription...")
                 model_manager.check_whisper_initialized()
+                logger.info("Whisper model confirmed ready.")
+
+                for idx, data in enumerate(knowledge_base["audio_data"]):
+                     audio_filename = data['original_filename']
+                     logger.info(f"Attempting transcription for audio source {idx+1}: {audio_filename} ({data['name']}, {data['position']})")
+                     try:
+                         # Call the robust transcription function
+                         transcription = transcribe_audio_or_video(data["file_path"])
+                         if transcription and not transcription.startswith("Error"):
+                             logger.info(f"Transcription successful for audio {idx+1}. Length: {len(transcription)}")
+                             quote = f'"{transcription}" - {data["name"]}, {data["position"]}'
+                             transcriptions_for_prompt += f"{quote}\n\n"
+                             raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n"{transcription}"\n\n'
+                         elif transcription:
+                             logger.warning(f"Transcription failed or returned error for audio source {idx+1} ({audio_filename}): {transcription}")
+                             raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n[Error during transcription: {transcription}]\n\n'
+                         else:
+                             logger.warning(f"Transcription returned empty result for audio source {idx+1} ({audio_filename}).")
+                             raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n[Transcription result was empty.]\n\n'
+                     except Exception as e:
+                         logger.error(f"!!! CRITICAL Error during transcription call for audio source {idx+1} ({audio_filename}): {e}")
+                         logger.error(traceback.format_exc())
+                         raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n[CRITICAL Error during transcription: {e}]\n\n'
+                     # Gradio handles cleanup of the uploaded temp file audio_file.name based on the path stored
+
             except Exception as whisper_init_err:
-                 logger.error(f"FATAL: Whisper model initialization failed: {whisper_init_err}. Cannot transcribe.")
-                 # Add error message to raw transcriptions and continue without transcriptions
-                 raw_transcriptions += f"[ERROR] Whisper model failed to load. Audio sources could not be transcribed: {whisper_init_err}\n\n"
-                 # Optionally return an error message immediately?
-                 # return f"Error: Could not initialize transcription model. {whisper_init_err}", raw_transcriptions
-
-        if model_manager.whisper_model: # Proceed only if whisper loaded successfully
-             logger.info("Transcribing collected audio sources...")
-             for idx, data in enumerate(knowledge_base["audio_data"]):
-                 try:
-                     logger.info(f"Transcribing audio source {idx+1}: {data['original_filename']} ({data['name']}, {data['position']})")
-                     transcription = transcribe_audio_or_video(data["file_path"])
-                     if transcription and not transcription.startswith("Error"):
-                         quote = f'"{transcription}" - {data["name"]}, {data["position"]}'
-                         transcriptions_for_prompt += f"{quote}\n\n"
-                         raw_transcriptions += f'[Audio/Video {idx + 1}: {data["original_filename"]} ({data["name"]}, {data["position"]})]\n"{transcription}"\n\n'
-                     else:
-                         logger.warning(f"Transcription failed or returned error for audio source {idx+1}: {transcription}")
-                         raw_transcriptions += f'[Audio/Video {idx + 1}: {data["original_filename"]} ({data["name"]}, {data["position"]})]\n[Error during transcription: {transcription}]\n\n'
-                 except Exception as e:
-                     logger.error(f"Error during transcription for audio source {idx+1} ({data['original_filename']}): {e}")
-                     logger.error(traceback.format_exc())
-                     raw_transcriptions += f'[Audio/Video {idx + 1}: {data["original_filename"]} ({data["name"]}, {data["position"]})]\n[Error during transcription: {e}]\n\n'
-                 # Gradio handles cleanup of the uploaded temp file audio_file.name
+                 # This catches errors from check_whisper_initialized if it failed
+                 logger.error(f"!!! FATAL: Whisper model could not be initialized. Skipping all audio transcriptions.")
+                 logger.error(traceback.format_exc())
+                 raw_transcriptions += f"\n\n[CRITICAL ERROR] Whisper model failed to load. Audio sources could not be transcribed: {whisper_init_err}\n\n"
+                 # Decide whether to continue without audio or return error immediately
+                 # For now, we continue and log the error.
+
+            logger.info("--- Finished Audio Transcription Phase ---")
+        else:
+            logger.info("--- Skipping Audio Transcription Phase (no audio sources found) ---")
 
-        logger.info("Adding social media content to prompt data...")
+
+        # --- Add Social Media Content to Prompt Data ---
+        logger.info("--- Adding social media content to prompt data ---")
+        social_content_added_to_prompt = False
         for idx, data in enumerate(knowledge_base["social_content"]):
-            source_id = f'[Social Media {idx+1}: {data["url"]} ({data["name"]}, {data["context"]})]'
-            has_content = False
-            if data["text"] and not data["text"].startswith("Error"):
-                # Truncate long text for the prompt, but keep full in knowledge base maybe?
+            source_id_log = f'[Social Media {idx+1}: {data["url"]} ({data["name"]}, {data["context"]})]'
+            source_id_prompt = f'Social Media Post ({data["name"]}, {data["context"]} at {data["url"]}):'
+            content_added_this_source = False
+
+            # Add text content if available
+            if data["text"]:
                 text_excerpt = (data["text"][:500] + "...[text truncated]") if len(data["text"]) > 500 else data["text"]
-                social_text_prompt = f'{source_id} - Text Content:\n"{text_excerpt}"\n\n'
-                transcriptions_for_prompt += social_text_prompt # Add text content as if it were a quote/source
-                raw_transcriptions += f"{source_id}\nText Content:\n{data['text']}\n\n" # Log full text
-                has_content = True
-            if data["video_transcription"] and not data["video_transcription"].startswith("Error"):
-                social_video_prompt = f'{source_id} - Video Transcription:\n"{data["video_transcription"]}"\n\n'
+                social_text_prompt = f'{source_id_prompt}\nText Content:\n"{text_excerpt}"\n\n'
+                transcriptions_for_prompt += social_text_prompt
+                raw_transcriptions += f"{source_id_log}\nText Content:\n{data['text']}\n\n" # Log full text
+                logger.info(f"Added text excerpt from social source {idx+1} to prompt data.")
+                content_added_this_source = True
+                social_content_added_to_prompt = True
+
+            # Add video transcription if available
+            if data["video_transcription"]:
+                social_video_prompt = f'{source_id_prompt}\nVideo Transcription:\n"{data["video_transcription"]}"\n\n'
                 transcriptions_for_prompt += social_video_prompt
-                raw_transcriptions += f"{source_id}\nVideo Transcription:\n{data['video_transcription']}\n\n"
-                has_content = True
-            if not has_content:
-                 raw_transcriptions += f"{source_id}\n[No usable text or video transcription found]\n\n"
+                raw_transcriptions += f"{source_id_log}\nVideo Transcription:\n{data['video_transcription']}\n\n"
+                logger.info(f"Added video transcription from social source {idx+1} to prompt data.")
+                content_added_this_source = True
+                social_content_added_to_prompt = True
+
+            if not content_added_this_source:
+                 logger.info(f"No usable text or video transcription found for social source {idx+1} ({data['url']}).")
+                 # No need to add error to raw_transcriptions here, lack of content is logged earlier
+
+        if not social_content_added_to_prompt:
+             logger.info("No content from social media sources was added to the prompt data.")
+        logger.info("--- Finished adding social media content to prompt data ---")
 
 
         # --- Prepare Final Prompt ---
-        # Combine document and URL summaries
-        document_summary = "\n\n".join(knowledge_base["document_content"]) if knowledge_base["document_content"] else "No document content provided."
-        url_summary = "\n\n".join(knowledge_base["url_content"]) if knowledge_base["url_content"] else "No URL content provided."
-        transcription_summary = transcriptions_for_prompt if transcriptions_for_prompt else "No usable transcriptions available."
+        logger.info("--- Preparing final prompt for LLM ---")
+        document_summary = "\n\n".join(knowledge_base["document_content"]) if knowledge_base["document_content"] else "No document content provided or processed successfully."
+        url_summary = "\n\n".join(knowledge_base["url_content"]) if knowledge_base["url_content"] else "No URL content provided or processed successfully."
+        transcription_summary = transcriptions_for_prompt if transcriptions_for_prompt else "No usable transcriptions or social media content available."
 
         # Construct the prompt for the LLM
         prompt = f"""<s>[INST] You are a professional news writer. Your task is to synthesize information from various sources into a coherent news article.
@@ -885,102 +1133,130 @@ Begin the article now. [/INST]
 Article Draft:
 """
 
-        # Log the prompt length (useful for debugging context limits)
-        logger.info(f"Generated prompt length: {len(prompt.split())} words / {len(prompt)} characters.")
-        # Avoid logging the full prompt if it's too long or contains sensitive info
-        # logger.debug(f"Generated Prompt:\n{prompt}")
+        # Log prompt length details
+        prompt_words = len(prompt.split())
+        prompt_chars = len(prompt)
+        logger.info(f"Generated prompt length: {prompt_words} words / {prompt_chars} characters.")
+        # Log first/last few chars for verification, avoid logging full potentially huge prompt
+        logger.debug(f"Prompt Start: {prompt[:200]}...")
+        logger.debug(f"...Prompt End: {prompt[-200:]}")
+        logger.info("--- Finished preparing final prompt ---")
+
 
         # --- Generate News Article ---
-        logger.info("Generating news article with LLM...")
+        logger.info("--- Starting LLM Generation Phase ---")
         generation_start_time = time.time()
 
-        # Estimate max_new_tokens based on requested size + buffer
-        # Add buffer for title, hook, and potential verbosity
+        # Ensure LLM is ready
+        logger.info("Ensuring LLM is initialized for generation...")
+        try:
+            model_manager.check_llm_initialized() # Raises error if fails
+            logger.info("LLM confirmed ready.")
+        except Exception as llm_init_err:
+            logger.error(f"!!! FATAL: LLM could not be initialized. Cannot generate article.")
+            logger.error(traceback.format_exc())
+            raise RuntimeError(f"LLM failed to initialize, cannot generate article: {llm_init_err}")
+
+
+        # Estimate max_new_tokens
         estimated_tokens_per_word = 1.5
-        max_new_tokens = int(size * estimated_tokens_per_word + 150) # size words + buffer
-        # Ensure max_new_tokens doesn't exceed model limits (adjust based on model's max context)
-        model_max_length = 2048 # Typical for TinyLlama, but check specific model card
-        # Calculate available space for generation
-        # Note: This token count is approximate. Precise tokenization is needed for accuracy.
-        # prompt_tokens = len(model_manager.tokenizer.encode(prompt)) # More accurate but slower
-        prompt_tokens_estimate = len(prompt) // 3 # Rough estimate
-        max_new_tokens = min(max_new_tokens, model_max_length - prompt_tokens_estimate - 50) # Leave buffer
-        max_new_tokens = max(max_new_tokens, 100) # Ensure at least a minimum generation length
-
-        logger.info(f"Requesting max_new_tokens: {max_new_tokens}")
+        max_new_tokens = int(size * estimated_tokens_per_word + 150) # size words + buffer for title/hook/etc.
+        model_max_length = 2048 # Check model card if different
+        # Simple length check for prompt tokens (more accurate requires tokenizer)
+        prompt_tokens_estimate = prompt_chars // 3 # Very rough estimate
+        available_tokens = model_max_length - prompt_tokens_estimate - 50 # Leave buffer
+        max_new_tokens = min(max_new_tokens, available_tokens)
+        max_new_tokens = max(max_new_tokens, 100) # Ensure at least minimum generation length
+
+        logger.info(f"Estimated prompt tokens: ~{prompt_tokens_estimate}. Model max length: {model_max_length}. Requesting max_new_tokens: {max_new_tokens}")
 
         try:
-            # Generate using the pipeline
+            logger.info("Calling LLM text generation pipeline...")
             outputs = model_manager.text_pipeline(
                 prompt,
-                max_new_tokens=max_new_tokens, # Use max_new_tokens instead of max_length
+                max_new_tokens=max_new_tokens,
                 do_sample=True,
-                temperature=0.7, # Standard temperature for creative but factual
+                temperature=0.7,
                 top_p=0.95,
-                top_k=50,         # Consider adding top_k
-                repetition_penalty=1.15, # Adjusted penalty
+                top_k=50,
+                repetition_penalty=1.15,
                 pad_token_id=model_manager.tokenizer.eos_token_id,
                 num_return_sequences=1
             )
+            logger.info("LLM pipeline call finished.")
+
+            if not outputs or not isinstance(outputs, list) or not outputs[0].get('generated_text'):
+                 logger.error("LLM pipeline returned invalid or empty output.")
+                 raise RuntimeError("LLM generation failed: Pipeline returned empty or invalid output.")
 
             # Extract generated text
-            generated_text = outputs[0]['generated_text']
+            full_generated_text = outputs[0]['generated_text']
+            logger.info(f"Raw generated text length: {len(full_generated_text)} chars.")
+            # logger.debug(f"Raw LLM Output:\n{full_generated_text}") # Careful logging full output
 
             # Clean up the result by removing the prompt
-            # Find the end of the prompt marker [/INST] and take text after it
+            logger.info("Cleaning LLM output (removing prompt)...")
             inst_marker = "[/INST]"
-            marker_pos = generated_text.find(inst_marker)
+            marker_pos = full_generated_text.find(inst_marker)
             if marker_pos != -1:
-                news_article = generated_text[marker_pos + len(inst_marker):].strip()
+                generated_article = full_generated_text[marker_pos + len(inst_marker):].strip()
                 # Further clean potentially leading "Article Draft:" if model included it
-                if news_article.startswith("Article Draft:"):
-                    news_article = news_article[len("Article Draft:"):].strip()
+                if generated_article.startswith("Article Draft:"):
+                    generated_article = generated_article[len("Article Draft:"):].strip()
+                logger.info("Prompt removed successfully using '[/INST]' marker.")
             else:
-                 # Fallback: Try removing the input prompt string itself (less reliable)
-                 if prompt in generated_text:
-                     news_article = generated_text.replace(prompt, "", 1).strip()
-                 else:
-                      # If prompt not found exactly, assume the output is only the generation
-                      # This might happen if the pipeline handles prompt removal internally sometimes
-                      news_article = generated_text
-                      logger.warning("Prompt marker '[/INST]' not found in LLM output. Returning full output.")
-
+                 logger.warning("Prompt marker '[/INST]' not found in LLM output. Attempting fallback cleaning.")
+                 # Fallback: Try removing the input prompt string itself (less reliable for long prompts)
+                 # This is risky and might remove actual generated content if prompt is somehow repeated.
+                 # Let's just return the full output with a warning if marker not found.
+                 generated_article = full_generated_text # Keep full output
+                 logger.warning("Could not reliably remove prompt. Returning full generated text.")
 
             generation_time = time.time() - generation_start_time
-            logger.info(f"News generation completed in {generation_time:.2f} seconds. Output length: {len(news_article)} characters.")
+            logger.info(f"News generation completed in {generation_time:.2f} seconds.")
+            logger.info(f"Final article length: {len(generated_article)} characters.")
+            logger.info("--- Finished LLM Generation Phase ---")
 
         except torch.cuda.OutOfMemoryError as oom_error:
-             logger.error(f"CUDA Out of Memory error during LLM generation: {oom_error}")
+             logger.error(f"!!! CUDA Out of Memory error during LLM generation: {oom_error}")
              logger.error(traceback.format_exc())
+             logger.info("Attempting to reset models after OOM error...")
              model_manager.reset_models(force=True) # Attempt to recover
              raise RuntimeError("Generation failed due to insufficient GPU memory. Please try reducing article size or complexity.") from oom_error
         except Exception as gen_error:
-            logger.error(f"Error during text generation pipeline: {str(gen_error)}")
+            logger.error(f"!!! Error during text generation pipeline: {str(gen_error)}")
             logger.error(traceback.format_exc())
             raise RuntimeError(f"LLM generation failed: {gen_error}") from gen_error
 
         total_time = time.time() - request_start_time
-        logger.info(f"Total request processing time: {total_time:.2f} seconds.")
+        logger.info(f"--- generate_news function completed successfully in {total_time:.2f} seconds. ---")
 
         # Return the generated article and the log of raw transcriptions
-        return news_article, raw_transcriptions.strip()
+        return generated_article.strip(), raw_transcriptions.strip()
 
     except Exception as e:
+        # Catch-all for any unexpected error during the entire generate_news flow
         total_time = time.time() - request_start_time
-        logger.error(f"Error in generate_news function after {total_time:.2f} seconds: {str(e)}")
+        logger.error(f"!!! UNHANDLED Error in generate_news function after {total_time:.2f} seconds: {str(e)}")
         logger.error(traceback.format_exc())
         # Attempt to reset models to recover state if possible
         try:
+            logger.info("Attempting model reset due to unhandled error in generate_news.")
             model_manager.reset_models(force=True)
         except Exception as reset_error:
             logger.error(f"Failed to reset models after error: {str(reset_error)}")
         # Return error messages to the UI
-        error_message = f"Error generating the news article: {str(e)}"
-        transcription_log = raw_transcriptions.strip() + f"\n\n[ERROR] News generation failed: {str(e)}"
+        error_message = f"Error generating the news article: An unexpected error occurred. Please check logs. ({str(e)})"
+        transcription_log = raw_transcriptions.strip() + f"\n\n[CRITICAL ERROR] News generation failed unexpectedly: {str(e)}"
         return error_message, transcription_log
+    finally:
+        # Optional: Log resource usage here if possible/needed
+        logger.info("--- generate_news function finished execution (either success or error) ---")
+
 
 def create_demo():
     """Creates the Gradio interface"""
+    logger.info("--- Creating Gradio interface ---")
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 📰 NewsIA - AI News Generator")
         gr.Markdown("Create professional news articles from multiple information sources.")
@@ -990,6 +1266,7 @@ def create_demo():
 
         with gr.Row():
             with gr.Column(scale=2):
+                logger.info("Creating instruction input.")
                 instructions = gr.Textbox(
                     label="Instructions for the News Article",
                     placeholder="Enter specific instructions for generating your news article (e.g., focus on the economic impact)",
@@ -998,6 +1275,7 @@ def create_demo():
                 )
                 all_inputs.append(instructions)
 
+                logger.info("Creating facts input.")
                 facts = gr.Textbox(
                     label="Main Facts",
                     placeholder="Describe the most important facts the news should include (e.g., Event name, date, location, key people involved)",
@@ -1007,6 +1285,7 @@ def create_demo():
                 all_inputs.append(facts)
 
                 with gr.Row():
+                    logger.info("Creating size slider.")
                     size_slider = gr.Slider(
                         label="Approximate Length (words)",
                         minimum=100,
@@ -1016,6 +1295,7 @@ def create_demo():
                     )
                     all_inputs.append(size_slider)
 
+                    logger.info("Creating tone dropdown.")
                     tone_dropdown = gr.Dropdown(
                         label="Tone of the News Article",
                         choices=["neutral", "serious", "formal", "urgent", "investigative", "human-interest", "lighthearted"],
@@ -1026,6 +1306,7 @@ def create_demo():
             with gr.Column(scale=3):
                 with gr.Tabs():
                     with gr.TabItem("📝 Documents"):
+                        logger.info("Creating document input tabs.")
                         gr.Markdown("Upload relevant documents (PDF, DOCX, XLSX, CSV). Max 5.")
                         doc_inputs = []
                         for i in range(1, 6):
@@ -1036,8 +1317,10 @@ def create_demo():
                             )
                             doc_inputs.append(doc_file)
                         all_inputs.extend(doc_inputs)
+                        logger.info(f"{len(doc_inputs)} document inputs created.")
 
                     with gr.TabItem("🔊 Audio/Video"):
+                         logger.info("Creating audio/video input tabs.")
                          gr.Markdown("Upload audio or video files for transcription (MP3, WAV, MP4, MOV, etc.). Max 5 sources.")
                          audio_video_inputs = []
                          for i in range(1, 6):
@@ -1062,8 +1345,11 @@ def create_demo():
                                 audio_video_inputs.append(speaker_name)
                                 audio_video_inputs.append(speaker_role)
                          all_inputs.extend(audio_video_inputs)
+                         logger.info(f"{len(audio_video_inputs)} audio/video inputs created (file + 2 textboxes per source).")
+
 
                     with gr.TabItem("🌐 URLs"):
+                         logger.info("Creating URL input tabs.")
                          gr.Markdown("Add URLs to relevant web pages or articles. Max 5.")
                          url_inputs = []
                          for i in range(1, 6):
@@ -1074,8 +1360,10 @@ def create_demo():
                             )
                             url_inputs.append(url_textbox)
                          all_inputs.extend(url_inputs)
+                         logger.info(f"{len(url_inputs)} URL inputs created.")
 
                     with gr.TabItem("📱 Social Media"):
+                         logger.info("Creating social media input tabs.")
                          gr.Markdown("Add URLs to social media posts (e.g., Twitter, YouTube, TikTok). Max 3.")
                          social_inputs = []
                          for i in range(1, 4):
@@ -1101,31 +1389,39 @@ def create_demo():
                                 social_inputs.append(social_name_textbox)
                                 social_inputs.append(social_context_textbox)
                          all_inputs.extend(social_inputs)
+                         logger.info(f"{len(social_inputs)} social media inputs created (URL + 2 textboxes per source).")
+
 
+        logger.info(f"Total number of input components collected: {len(all_inputs)}")
 
         with gr.Row():
+            logger.info("Creating generate and clear buttons.")
             generate_button = gr.Button("✨ Generate News Article", variant="primary")
             clear_button = gr.Button("🔄 Clear All Inputs")
 
         with gr.Tabs():
             with gr.TabItem("📄 Generated News Article"):
+                logger.info("Creating news output textbox.")
                 news_output = gr.Textbox(
                     label="Draft News Article",
                     lines=20, # Increased lines
                     show_copy_button=True,
-                    value=""
+                    value="",
+                    interactive=False # Make non-editable initially
                 )
             with gr.TabItem("🎙️ Source Transcriptions & Logs"):
+                logger.info("Creating transcriptions/log output textbox.")
                 transcriptions_output = gr.Textbox(
                     label="Transcriptions and Processing Log",
                     lines=15, # Increased lines
                     show_copy_button=True,
-                    value=""
+                    value="",
+                    interactive=False # Make non-editable initially
                 )
 
         # --- Event Handlers ---
-        # Define outputs
         outputs_list = [news_output, transcriptions_output]
+        logger.info("Setting up event handlers.")
 
         # Generate button click
         generate_button.click(
@@ -1133,30 +1429,37 @@ def create_demo():
             inputs=all_inputs, # Pass the consolidated list
             outputs=outputs_list
         )
+        logger.info("Generate button click handler set.")
 
         # Clear button click
         def clear_all_inputs_and_outputs():
-            # Return a list of default values matching the number and type of inputs + outputs
+            logger.info("--- Clear All button clicked ---")
             reset_values = []
+            # Generate default values based on input component types
             for input_comp in all_inputs:
-                 # Default for Textbox, Dropdown is "", for Slider is its default, for File is None
                 if isinstance(input_comp, (gr.Textbox, gr.Dropdown)):
                     reset_values.append("")
                 elif isinstance(input_comp, gr.Slider):
-                    # Find the original default value if needed, or just use a sensible default
                     reset_values.append(250) # Reset slider to default
                 elif isinstance(input_comp, gr.File):
                     reset_values.append(None)
                 else:
-                    reset_values.append(None) # Default for unknown/other types
+                    logger.warning(f"Unhandled input type for reset: {type(input_comp)}. Resetting to None.")
+                    reset_values.append(None)
 
-            # Add default values for the output fields
-            reset_values.extend(["", ""]) # Two Textbox outputs
+            # Add default values for the output fields (empty strings for textboxes)
+            reset_values.extend(["", ""])
+            logger.info(f"Generated {len(reset_values)} reset values for UI components.")
 
-            # Also reset the models in the background
-            model_manager.reset_models(force=True)
-            logger.info("UI cleared and models reset.")
+            # Also reset the models in the background (optional, but good for freeing resources)
+            try:
+                 logger.info("Calling model reset from clear button handler.")
+                 model_manager.reset_models(force=True)
+            except Exception as e:
+                 logger.error(f"Error resetting models during clear operation: {e}")
+                 logger.error(traceback.format_exc())
 
+            logger.info("--- Clear All operation finished ---")
             return reset_values
 
         clear_button.click(
@@ -1164,37 +1467,44 @@ def create_demo():
             inputs=None, # No inputs needed for the clear function itself
             outputs=all_inputs + outputs_list # The list of components to clear
         )
-
-        # Add event handler to reset models when the Gradio app closes or reloads (if possible)
-        # demo.unload(model_manager.reset_models, inputs=None, outputs=None) # Might not work reliably in Spaces
-
+        logger.info("Clear button click handler set.")
+        logger.info("--- Gradio interface creation complete ---")
     return demo
 
 if __name__ == "__main__":
-    logger.info("Starting NewsIA application...")
+    logger.info("--- Running main execution block ---")
 
-    # Optional: Pre-initialize Whisper on startup if desired and resources allow
-    # This can make the first transcription faster but uses GPU resources immediately.
-    # Consider enabling only if transcriptions are very common.
+    # Optional: Pre-initialize Whisper on startup (consider trade-offs)
     # try:
-    #     logger.info("Attempting to pre-initialize Whisper model...")
+    #     logger.info("Attempting to pre-initialize Whisper model on startup...")
     #     model_manager.initialize_whisper()
+    #     logger.info("Whisper pre-initialization successful.")
     # except Exception as e:
     #     logger.warning(f"Pre-initialization of Whisper model failed (will load on demand): {str(e)}")
+    #     logger.warning(traceback.format_exc())
 
     # Create the Gradio Demo
+    logger.info("Creating Gradio demo instance...")
     news_demo = create_demo()
+    logger.info("Gradio demo instance created.")
 
-    # Configure the queue - remove concurrency_count and max_size
-    # Use default queue settings, suitable for most Spaces environments
-    news_demo.queue()
+    # Configure the queue
+    logger.info("Configuring Gradio queue...")
+    news_demo.queue() # Use default queue settings
+    logger.info("Gradio queue configured.")
 
     # Launch the Gradio app
     logger.info("Launching Gradio interface...")
-    news_demo.launch(
-        server_name="0.0.0.0", # Necessary for Docker/Spaces
-        server_port=7860,
-        # share=True # Share=True is often handled by Spaces automatically, can be removed
-        # debug=True # Enable for more detailed Gradio logs if needed
-    )
-    logger.info("NewsIA application finished.")
\ No newline at end of file
+    try:
+        news_demo.launch(
+            server_name="0.0.0.0", # Necessary for Docker/Spaces
+            server_port=7860,
+            # share=False, # Usually set by Spaces automatically
+            # debug=True # Enable for more Gradio-specific logs if needed
+        )
+        logger.info("Gradio launch called. Application running.")
+    except Exception as launch_err:
+         logger.error(f"!!! CRITICAL Error during Gradio launch: {launch_err}")
+         logger.error(traceback.format_exc())
+
+    logger.info("--- Main execution block finished ---") # May not be reached if launch blocks
\ No newline at end of file