Spaces:

FreestylerAI
/

pdf-dataset-generator

Sleeping

App Files Files Community

FreestylerAI commited on May 8

Commit

9d0ffca

verified ·

1 Parent(s): fbf0ed4

Update app.py

Browse files

Files changed (1) hide show

app.py +736 -813

app.py CHANGED Viewed

@@ -1,813 +1,736 @@
-import os
-import json
-import pandas as pd
-import gradio as gr
-import spaces
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-import csv
-import yaml
-from typing import List, Dict, Any
-import random
-from pypdf import PdfReader
-import re
-import tempfile
-from huggingface_hub import HfApi
-# Configuration
-DEFAULT_MODEL = "databricks/dolly-v2-3b"  # Smaller, more suitable for Spaces
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"  # Try to use CUDA if available
-MAX_NEW_TOKENS = 512
-TEMPERATURE = 0.7
-HF_TOKEN = os.environ.get("HF_TOKEN") if os.environ.get("HF_TOKEN") else None  # Get token from environment variables
-MAX_RAM_GB = 45  # Set maximum RAM usage to 45GB (below the 70GB limit)
-# Create offload folder for model memory management
-os.makedirs("offload_folder", exist_ok=True)
-# Setup RAM monitoring
-def get_process_memory_usage():
-    """Get the current memory usage of this process in GB"""
-    import psutil
-    process = psutil.Process(os.getpid())
-    return process.memory_info().rss / (1024 * 1024 * 1024)  # Convert to GB
-class PdfExtractor:
-    """Extract text content from PDF files"""
-    @staticmethod
-    def extract_text_from_pdf(pdf_file):
-        """Extract text from a PDF file"""
-        try:
-            reader = PdfReader(pdf_file)
-            text = ""
-            for page in reader.pages:
-                text += page.extract_text() + "\n"
-            return text
-        except Exception as e:
-            print(f"Error extracting text from PDF: {e}")
-            return None
-    @staticmethod
-    def clean_text(text):
-        """Clean and preprocess extracted text"""
-        if not text:
-            return ""
-        # Replace multiple newlines with single newline
-        text = re.sub(r'\n+', '\n', text)
-        # Replace multiple spaces with single space
-        text = re.sub(r'\s+', ' ', text)
-        return text.strip()
-    @staticmethod
-    def chunk_text(text, max_chunk_size=1000, overlap=100):
-        """Split text into chunks of specified size with overlap"""
-        if not text:
-            return []
-        chunks = []
-        start = 0
-        text_length = len(text)
-        while start < text_length:
-            end = min(start + max_chunk_size, text_length)
-            # If we're not at the end, try to break at a sentence or paragraph
-            if end < text_length:
-                # Look for sentence breaks (period, question mark, exclamation mark followed by space)
-                sentence_break = max(
-                    text.rfind('. ', start, end),
-                    text.rfind('? ', start, end),
-                    text.rfind('! ', start, end),
-                    text.rfind('\n', start, end)
-                )
-                if sentence_break > start + max_chunk_size // 2:
-                    end = sentence_break + 1
-            chunks.append(text[start:end].strip())
-            start = end - overlap  # Create overlap with previous chunk
-        return chunks
-class SyntheticDataGenerator:
-    def __init__(self, model_name=DEFAULT_MODEL):
-        self.model_name = model_name
-        self.model = None
-        self.tokenizer = None
-        self.fallback_models = [
-            "databricks/dolly-v2-3b",  # Smallest, most reliable model as primary fallback
-            "EleutherAI/gpt-neo-1.3B", # Second fallback option
-        ]
-        # Don't try to load the model in init - we'll load it when needed
-        # This prevents initialization errors from blocking the app startup
-    def load_model(self):
-        """Load the specified model or fall back to a smaller model if loading fails"""
-        # Clear CUDA cache if using GPU to prevent memory fragmentation
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            # Try initializing CUDA explicitly to catch early errors
-            try:
-                torch.tensor([1.0], device="cuda")
-            except Exception as e:
-                print(f"CUDA initialization error: {e}")
-        models_to_try = [self.model_name]
-        # Add fallback models only if the requested model isn't already in the fallback list
-        if self.model_name not in self.fallback_models:
-            models_to_try.extend(self.fallback_models)
-        for model_name in models_to_try:
-            try:
-                print(f"Loading model {model_name} on {DEVICE}...")
-                # Add token for authentication if available
-                tokenizer_kwargs = {}
-                model_kwargs = {
-                    "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
-                    "device_map": "auto" if torch.cuda.is_available() else None,
-                    "low_cpu_mem_usage": True,  # Added to reduce memory usage on CPU
-                    "offload_folder": "offload_folder"  # Add offload folder for large models
-                }
-                # Handle potential CUDA out-of-memory errors with smaller settings
-                if torch.cuda.is_available():
-                    try:
-                        # Check available GPU memory
-                        free_memory, total_memory = torch.cuda.mem_get_info()
-                        free_memory_gb = free_memory / (1024**3)
-                        total_memory_gb = total_memory / (1024**3)
-                        print(f"GPU memory: {free_memory_gb:.2f}GB free out of {total_memory_gb:.2f}GB total")
-                        # If we're running low on memory (this is common in Spaces), use more aggressive memory saving
-                        model_kwargs["max_memory"] = {0: f"{max(free_memory_gb*0.8, 0.5)}GB", "cpu": "8GB"}
-                        # For smaller GPUs or when memory is constrained, use more aggressive offloading
-                        if free_memory_gb < 4.0:  # Less than 4GB free
-                            print("Low GPU memory detected. Using CPU offloading...")
-                            # More conservative memory map to prevent ZeroGPU errors
-                            model_kwargs["device_map"] = "auto"
-                            model_kwargs["offload_state_dict"] = True  # More aggressive offloading
-                    except Exception as memory_check_error:
-                        print(f"Error checking GPU memory: {memory_check_error}")
-                        # Continue with default settings but with safeguards
-                        model_kwargs["device_map"] = "auto"  # Let the library decide the best mapping
-                # Add token for authentication if available and model is gated
-                if HF_TOKEN:
-                    tokenizer_kwargs["token"] = HF_TOKEN
-                    model_kwargs["token"] = HF_TOKEN
-                    print("Using Hugging Face token for authentication")
-                # Load tokenizer with safeguards against ZeroGPU issues
-                try:
-                    self.tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_kwargs)
-                except Exception as tokenizer_error:
-                    print(f"Error loading tokenizer: {tokenizer_error}")
-                    # Try loading with additional safety settings
-                    tokenizer_kwargs["local_files_only"] = False
-                    tokenizer_kwargs["revision"] = "main"
-                    self.tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_kwargs)
-                # Load the model with ZeroGPU error prevention
-                try:
-                    self.model = AutoModelForCausalLM.from_pretrained(
-                        model_name,
-                        **model_kwargs
-                    )
-                except RuntimeError as e:
-                    if "CUDA" in str(e) or "GPU" in str(e) or "out of memory" in str(e):
-                        print(f"CUDA error loading model: {e}")
-                        # Fall back to CPU if GPU fails
-                        print("Falling back to CPU for model loading")
-                        model_kwargs["device_map"] = {"": "cpu"}
-                        model_kwargs["torch_dtype"] = torch.float32
-                        self.model = AutoModelForCausalLM.from_pretrained(
-                            model_name,
-                            **model_kwargs
-                        )
-                    else:
-                        raise
-                # Ensure model is on the right device if not using device_map="auto"
-                if not torch.cuda.is_available():
-                    self.model = self.model.to(DEVICE)
-                # If we loaded a fallback model instead of the requested one, update the model_name
-                if model_name != self.model_name:
-                    print(f"Successfully loaded fallback model {model_name} instead of {self.model_name}")
-                    self.model_name = model_name
-                else:
-                    print(f"Model loaded successfully on {DEVICE}")
-                return True
-            except Exception as e:
-                print(f"Error loading model {model_name}: {e}")
-                self.model = None
-                self.tokenizer = None
-                # Continue to the next model in the list
-        # If we get here, all models failed
-        print("All models failed to load")
-        return False
-    def generate_qa_prompt(self, context, num_questions=3, include_tags=True, difficulty_levels=True):
-        """Generate a prompt for creating Q&A pairs from context."""
-        tag_instruction = ""
-        if include_tags:
-            tag_instruction = "Add 1-3 tags for each question that categorize the topic or subject matter."
-        difficulty_instruction = ""
-        if difficulty_levels:
-            difficulty_instruction = "For each question, assign a difficulty level (easy, medium, or hard)."
-        prompt = f"""Task: Based on the following text, generate {num_questions} question and answer pairs that would be useful for comprehension testing or knowledge assessment.
-CONTEXT:
-{context}
-For each question:
-1. Write a clear, specific question about the information in the text
-2. Provide the correct answer to the question, citing relevant details from the text
-3. {tag_instruction}
-4. {difficulty_instruction}
-Format each Q&A pair as a JSON object with the following structure:
-{{
-  "question": "The question text",
-  "answer": "The answer text",
-  "tags": ["tag1", "tag2"],
-  "difficulty": "easy/medium/hard"
-}}
-Return all Q&A pairs in a JSON array.
-"""
-        return prompt
-    def generate_data(self, prompt, num_samples=1):
-        """Generate synthetic data using the loaded model."""
-        if not self.model or not self.tokenizer:
-            return ["Error: Model not loaded properly. Please try again with a different model."]
-        outputs = []
-        for sample_idx in range(num_samples):
-            try:
-                # Clear CUDA cache before generating to free up memory
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-                # ZeroGPU errors often occur in generate() calls
-                # To mitigate this, try multiple approaches in sequence
-                inputs = self.tokenizer(prompt, return_tensors="pt").to(DEVICE)
-                try:
-                    # First try: Standard generation with conservative settings
-                    with torch.no_grad():
-                        output = self.model.generate(
-                            **inputs,
-                            max_new_tokens=MAX_NEW_TOKENS,
-                            temperature=TEMPERATURE,
-                            do_sample=True,
-                            pad_token_id=self.tokenizer.eos_token_id,
-                            num_beams=1,  # Use greedy decoding instead of beam search
-                            early_stopping=True,
-                            no_repeat_ngram_size=3  # Prevent repetition
-                        )
-                    decoded_output = self.tokenizer.decode(output[0], skip_special_tokens=True)
-                except (RuntimeError, Exception) as e:
-                    if "CUDA" in str(e) or "GPU" in str(e) or "ZeroGPU" in str(e):
-                        print(f"GPU error during generation: {e}")
-                        print("Falling back to CPU generation...")
-                        # Move everything to CPU
-                        inputs = {k: v.to('cpu') for k, v in inputs.items()}
-                        # Create CPU copy of the model if we were using GPU
-                        if torch.cuda.is_available():
-                            # Temporarily move model to CPU for this generation
-                            model_cpu = self.model.to('cpu')
-                            with torch.no_grad():
-                                output = model_cpu.generate(
-                                    **inputs,
-                                    max_new_tokens=MAX_NEW_TOKENS,
-                                    temperature=TEMPERATURE,
-                                    do_sample=True,
-                                    pad_token_id=self.tokenizer.eos_token_id,
-                                    num_return_sequences=1,
-                                    max_length=MAX_NEW_TOKENS + inputs['input_ids'].shape[1]
-                                )
-                            decoded_output = self.tokenizer.decode(output[0], skip_special_tokens=True)
-                            # Move model back to CUDA for future calls
-                            self.model = self.model.to(DEVICE)
-                        else:
-                            # Already on CPU, try with reduced parameters
-                            with torch.no_grad():
-                                output = self.model.generate(
-                                    **inputs,
-                                    max_new_tokens=min(256, MAX_NEW_TOKENS),  # Reduce token count
-                                    temperature=0.5,  # Lower temperature
-                                    do_sample=False,  # No sampling
-                                    num_return_sequences=1,
-                                    pad_token_id=self.tokenizer.eos_token_id
-                                )
-                            decoded_output = self.tokenizer.decode(output[0], skip_special_tokens=True)
-                    else:
-                        # Re-raise non-CUDA errors
-                        raise
-                # Extract only the generated part (remove prompt)
-                prompt_text = self.tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)
-                generated_text = decoded_output[len(prompt_text):].strip()
-                outputs.append(generated_text)
-                # Clear CUDA cache between samples
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-            except Exception as e:
-                error_msg = f"Error generating sample {sample_idx+1}: {str(e)}"
-                print(error_msg)
-                outputs.append(f"Error: {error_msg}")
-        return outputs
-    def parse_json_data(self, generated_text):
-        """Extract and parse JSON from generated text."""
-        try:
-            # Find JSON-like content (between [ and ])
-            start_idx = generated_text.find('[')
-            end_idx = generated_text.rfind(']') + 1
-            if start_idx >= 0 and end_idx > start_idx:
-                json_str = generated_text[start_idx:end_idx]
-                return json.loads(json_str)
-            # Try to find single object format
-            start_idx = generated_text.find('{')
-            end_idx = generated_text.rfind('}') + 1
-            if start_idx >= 0 and end_idx > start_idx:
-                json_str = generated_text[start_idx:end_idx]
-                return json.loads(json_str)
-            print(f"Could not find JSON content in: {generated_text}")
-            return None
-        except json.JSONDecodeError as e:
-            print(f"JSON parse error: {e}")
-            print(f"Problematic text: {generated_text}")
-            # Try to find and fix common JSON formatting errors
-            try:
-                # Replace single quotes with double quotes
-                json_str = generated_text[start_idx:end_idx].replace("'", "\"")
-                return json.loads(json_str)
-            except:
-                pass
-            # If still failing, try to extract individual JSON objects
-            try:
-                pattern = r'\{[^{}]*\}'
-                matches = re.findall(pattern, generated_text)
-                if matches:
-                    results = []
-                    for match in matches:
-                        try:
-                            # Replace single quotes with double quotes
-                            fixed_match = match.replace("'", "\"")
-                            obj = json.loads(fixed_match)
-                            results.append(obj)
-                        except:
-                            continue
-                    if results:
-                        return results
-            except:
-                pass
-            return None
-    def generate_qa_from_pdf_chunk(self, chunk, num_questions=3, include_tags=True, difficulty_levels=True):
-        """Generate Q&A pairs from a PDF text chunk."""
-        if not self.model or not self.tokenizer:
-            return [], "Error: Model not loaded properly. Please try again with a different model."
-        if not chunk or len(chunk.strip()) < 100:  # Skip very small chunks
-            return [], "Chunk too small to generate meaningful Q&A pairs."
-        prompt = self.generate_qa_prompt(chunk, num_questions, include_tags, difficulty_levels)
-        raw_outputs = self.generate_data(prompt, num_samples=1)
-        raw_output = raw_outputs[0]
-        parsed_data = self.parse_json_data(raw_output)
-        # Ensure parsed data is a list
-        if parsed_data and isinstance(parsed_data, dict):
-            parsed_data = [parsed_data]
-        # Return both the parsed data and raw output for debugging
-        return parsed_data, raw_output
-def format_data_preview(data):
-    """Format the data for preview in the UI."""
-    if isinstance(data, list):
-        if len(data) > 0 and isinstance(data[0], dict):
-            # Convert list of dicts to DataFrame for better display
-            return pd.DataFrame(data).to_string()
-        else:
-            return json.dumps(data, indent=2)
-    elif isinstance(data, dict):
-        return json.dumps(data, indent=2)
-    else:
-        return str(data)
-def save_data(data, format, filename_prefix):
-    """Save data to a file in the specified format."""
-    os.makedirs("synthetic_data", exist_ok=True)
-    timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
-    filename = f"synthetic_data/{filename_prefix}_{timestamp}"
-    if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict):
-        df = pd.DataFrame(data)
-        if format.lower() == "csv":
-            full_filename = f"{filename}.csv"
-            df.to_csv(full_filename, index=False)
-        elif format.lower() == "json":
-            full_filename = f"{filename}.json"
-            with open(full_filename, "w") as f:
-                json.dump(data, f, indent=2)
-        elif format.lower() == "excel":
-            full_filename = f"{filename}.xlsx"
-            df.to_excel(full_filename, index=False)
-        else:
-            full_filename = f"{filename}.txt"
-            with open(full_filename, "w") as f:
-                f.write(str(data))
-    else:
-        full_filename = f"{filename}.{format.lower()}"
-        with open(full_filename, "w") as f:
-            if format.lower() == "json":
-                json.dump(data, f, indent=2)
-            else:
-                f.write(str(data))
-    return full_filename
-def load_models():
-    """Return a list of available models."""
-    return [
-        "databricks/dolly-v2-3b",
-        "databricks/dolly-v2-7b",
-        "EleutherAI/gpt-neo-1.3B",
-        "EleutherAI/gpt-neo-2.7B",
-        "tiiuae/falcon-7b-instruct"
-    ]
-@spaces.GPU
-def process_pdf_generate_qa(pdf_file, model_name, num_questions_per_chunk, include_tags, include_difficulty, output_file_format, progress=None):
-    """Process a PDF file and generate Q&A pairs from its content."""
-    if pdf_file is None:
-        return None, "Error: No PDF file uploaded", "", "No file provided"
-    try:
-        # Check RAM usage at start
-        current_ram_usage = get_process_memory_usage()
-        print(f"Starting RAM usage: {current_ram_usage:.2f}GB")
-        # Clear CUDA cache before starting
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        # Initialize extractor and generator
-        extractor = PdfExtractor()
-        generator = SyntheticDataGenerator(model_name)
-        # Wrap model loading in try-except to handle errors
-        try:
-            load_success = generator.load_model()
-            if not load_success:
-                return None, "Error: Failed to load the model. Please try again with a different model.", "", "Model loading failed"
-        except Exception as e:
-            if "ZeroGPU" in str(e) or "GPU task aborted" in str(e) or "CUDA" in str(e):
-                print(f"GPU error during model loading: {e}. Trying with a smaller model...")
-                # If we get a ZeroGPU error, immediately try the smallest model
-                generator.model_name = "EleutherAI/gpt-neo-1.3B"  # Use smallest model as emergency fallback
-                load_success = generator.load_model()
-                if not load_success:
-                    return None, "Error: Failed to load any model even after fallback. Please try again later.", "", "Model loading failed"
-            else:
-                # Re-raise other errors
-                raise
-        # Check RAM usage after model loading
-        ram_after_model = get_process_memory_usage()
-        print(f"RAM usage after model loading: {ram_after_model:.2f}GB")
-        # Save PDF temporarily if it's a file object
-        if hasattr(pdf_file, 'name'):
-            # It's already a file path
-            pdf_path = pdf_file.name
-        else:
-            # Create a temporary file
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
-                tmp.write(pdf_file)
-                pdf_path = tmp.name
-        # Extract text from PDF
-        pdf_text = extractor.extract_text_from_pdf(pdf_path)
-        if not pdf_text:
-            return None, "Failed to extract text from PDF", "", "No data generated"
-        # Clean and chunk the text - reduce chunk size to use less memory
-        cleaned_text = extractor.clean_text(pdf_text)
-        chunks = extractor.chunk_text(cleaned_text, max_chunk_size=400, overlap=30)
-        # Check RAM after PDF processing
-        ram_after_pdf = get_process_memory_usage()
-        print(f"RAM usage after PDF processing: {ram_after_pdf:.2f}GB, found {len(chunks)} chunks")
-        # If we're approaching the RAM limit already, reduce batch size
-        batch_size = 3  # Default
-        if ram_after_pdf > MAX_RAM_GB * 0.7:  # If already using 70% of our limit
-            batch_size = 1  # Process one chunk at a time
-            print(f"High RAM usage detected ({ram_after_pdf:.2f}GB), reducing batch size to 1")
-        elif ram_after_pdf > MAX_RAM_GB * 0.5:  # If using 50% of our limit
-            batch_size = 2  # Process two chunks at a time
-            print(f"Moderate RAM usage detected ({ram_after_pdf:.2f}GB), reducing batch size to 2")
-        # Generate Q&A pairs for each chunk
-        all_qa_pairs = []
-        all_raw_outputs = []
-        total_chunks = len(chunks)
-        # Process chunks in smaller batches to avoid memory buildup
-        for i in range(0, total_chunks, batch_size):
-            # Get the current batch of chunks
-            batch_chunks = chunks[i:min(i+batch_size, total_chunks)]
-            # Process each chunk in the batch
-            for j, chunk in enumerate(batch_chunks):
-                chunk_index = i + j
-                if progress is not None:
-                    progress(chunk_index / total_chunks, f"Processing chunk {chunk_index+1}/{total_chunks}")
-                # Check if we're approaching RAM limit
-                current_ram = get_process_memory_usage()
-                if current_ram > MAX_RAM_GB * 0.9:  # Over 90% of our limit
-                    print(f"WARNING: High RAM usage detected: {current_ram:.2f}GB - force releasing memory")
-                    import gc
-                    gc.collect()  # Force garbage collection
-                    if torch.cuda.is_available():
-                        torch.cuda.empty_cache()
-                    # If still too high after garbage collection, abort batch processing
-                    current_ram = get_process_memory_usage()
-                    if current_ram > MAX_RAM_GB * 0.95:  # Still dangerously high
-                        print(f"CRITICAL: RAM usage too high ({current_ram:.2f}GB), stopping processing")
-                        break
-                # Clear CUDA cache between chunks
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-                try:
-                    qa_pairs, raw_output = generator.generate_qa_from_pdf_chunk(
-                        chunk,
-                        num_questions=num_questions_per_chunk,
-                        include_tags=include_tags,
-                        difficulty_levels=include_difficulty
-                    )
-                except Exception as e:
-                    error_type = str(e)
-                    if "CUDA" in error_type or "GPU" in error_type or "ZeroGPU" in error_type:
-                        print(f"GPU error during generation for chunk {chunk_index+1}: {e}")
-                        # Fall back to CPU for this specific generation
-                        raw_output = f"Error in chunk {chunk_index+1}: {str(e)}. Skipping..."
-                        qa_pairs = None
-                    elif "memory" in error_type.lower() or "ram" in error_type.lower():
-                        print(f"Memory error processing chunk {chunk_index+1}: {e}")
-                        # Force garbage collection and skip chunk
-                        import gc
-                        gc.collect()
-                        if torch.cuda.is_available():
-                            torch.cuda.empty_cache()
-                        raw_output = f"Memory error in chunk {chunk_index+1}: {str(e)}. Skipping..."
-                        qa_pairs = None
-                    else:
-                        # For other errors, just log and continue
-                        print(f"Error processing chunk {chunk_index+1}: {e}")
-                        raw_output = f"Error in chunk {chunk_index+1}: {str(e)}"
-                        qa_pairs = None
-                if qa_pairs:
-                    all_qa_pairs.extend(qa_pairs)
-                all_raw_outputs.append(raw_output)
-                # Check RAM usage after processing this chunk
-                current_ram = get_process_memory_usage()
-                print(f"RAM after chunk {chunk_index+1}: {current_ram:.2f}GB")
-            # Do a thorough cleanup after each batch
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            # Force garbage collection between batches
-            import gc
-            gc.collect()
-            # Check if we need to abort due to memory constraints
-            current_ram = get_process_memory_usage()
-            if current_ram > MAX_RAM_GB:
-                print(f"WARNING: Exceeding RAM limit ({current_ram:.2f}GB). Stopping further processing.")
-                if progress is not None:
-                    progress(1.0, f"Stopped early due to high memory usage ({current_ram:.2f}GB)")
-                break
-        if progress is not None:
-            progress(1.0, "Finished processing")
-        # Final cache clear and garbage collection
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        import gc
-        gc.collect()
-        if not all_qa_pairs:
-            return None, "Failed to generate Q&A pairs", "\n\n".join(all_raw_outputs), "No data generated"
-        # Save data to file
-        filename = save_data(
-            all_qa_pairs,
-            output_file_format,
-            "qa_dataset"
-        )
-        # Format for display
-        formatted_data = format_data_preview(all_qa_pairs)
-        # Final memory report
-        final_ram = get_process_memory_usage()
-        print(f"Final RAM usage: {final_ram:.2f}GB")
-        return all_qa_pairs, formatted_data, "\n\n".join(all_raw_outputs), f"Data saved to {filename}"
-    except Exception as e:
-        error_msg = f"Error processing PDF: {str(e)}"
-        print(error_msg)
-        import traceback
-        print(traceback.format_exc())
-        return None, error_msg, "", "Processing failed"
-# Set up the Gradio interface
-def create_interface():
-    with gr.Blocks(title="PDF Q&A Dataset Generator") as app:
-        gr.Markdown("# 📚 PDF Q&A Dataset Generator")
-        gr.Markdown("""
-        Generate question & answer datasets from PDF documents using instruction-tuned language models.
-        Perfect for creating educational resources, quiz materials, or training data for Q&A systems.
-        """)
-        with gr.Tabs() as tabs:
-            with gr.TabItem("Generate Q&A Dataset"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        pdf_file = gr.File(
-                            label="Upload PDF",
-                            file_types=[".pdf"],
-                            type="binary"
-                        )
-                        model_dropdown = gr.Dropdown(
-                            choices=load_models(),
-                            value=DEFAULT_MODEL,
-                            label="Model"
-                        )
-                        num_questions = gr.Slider(
-                            minimum=1,
-                            maximum=5,
-                            value=3,
-                            step=1,
-                            label="Questions per Section"
-                        )
-                        include_tags = gr.Checkbox(
-                            value=True,
-                            label="Include Tags"
-                        )
-                        include_difficulty = gr.Checkbox(
-                            value=True,
-                            label="Include Difficulty Levels"
-                        )
-                        output_file_format = gr.Radio(
-                            choices=["json", "csv", "excel"],
-                            value="json",
-                            label="Save File Format"
-                        )
-                        generate_btn = gr.Button("Generate Q&A Dataset", variant="primary")
-                        progress_bar = gr.Progress()
-                    with gr.Column(scale=2):
-                        with gr.Tab("Parsed Data"):
-                            parsed_data_output = gr.JSON(label="Generated Q&A Pairs")
-                            formatted_data_output = gr.Textbox(
-                                label="Formatted Preview",
-                                lines=15
-                            )
-                        with gr.Tab("Raw Output"):
-                            raw_output = gr.Textbox(
-                                label="Raw Model Output",
-                                lines=15
-                            )
-                        file_output = gr.Textbox(label="File Output")
-            with gr.TabItem("Documentation"):
-                gr.Markdown("""
-                ## How to Use
-                1. **Upload a PDF**: Select a PDF document containing the content you want to generate questions from.
-                2. **Select a model**: Choose an instruction-tuned language model from the dropdown.
-                3. **Configure settings**:
-                   - Set the number of questions to generate per text section
-                   - Choose whether to include tags and difficulty levels
-                   - Select your preferred output file format
-                4. **Generate dataset**: Click the "Generate Q&A Dataset" button to create your dataset.
-                ## About This App
-                This app uses instruction-tuned language models to generate question and answer pairs from PDF documents. It:
-                1. Extracts text from the uploaded PDF
-                2. Splits the text into manageable chunks
-                3. Generates questions, answers, tags, and difficulty levels for each chunk
-                4. Combines all Q&A pairs into a comprehensive dataset
-                ### Features:
-                - Automatic text extraction from PDFs
-                - Smart text chunking to maintain context
-                - Customizable number of questions per chunk
-                - Optional tagging and difficulty classification
-                - Multiple output formats (JSON, CSV, Excel)
-                ### Use Cases:
-                - Create educational resources and quiz materials
-                - Generate training data for Q&A systems
-                - Build flashcard datasets for studying
-                - Develop content for educational applications
-                """)
-            with gr.TabItem("Status"):
-                gr.Markdown("""
-                ## System Status
-                This app runs on CPU mode. Some larger models might be slower to load and generate content.
-                If you encounter any issues with a specific model, try switching to a smaller model like `databricks/dolly-v2-3b`.
-                ### Troubleshooting
-                - If the app seems unresponsive after clicking "Generate", please be patient - model loading may take time.
-                - If you get an error about model loading, try refreshing the page and selecting a different model.
-                - Not all PDFs can be properly processed - if text extraction fails, try with a different PDF.
-                """)
-        # Event handler for generate button
-        generate_btn.click(
-            process_pdf_generate_qa,
-            inputs=[
-                pdf_file,
-                model_dropdown,
-                num_questions,
-                include_tags,
-                include_difficulty,
-                output_file_format
-            ],
-            outputs=[parsed_data_output, formatted_data_output, raw_output, file_output],
-            show_progress=True
-        )
-    return app
-# Export the app for Hugging Face Spaces
-app = create_interface()
-# Launch the app depending on the environment
-if __name__ == "__main__":
-    app.launch()

+import os
+import json
+import pandas as pd
+import gradio as gr
+import spaces
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+import csv
+import yaml
+from typing import List, Dict, Any
+import random
+from pypdf import PdfReader
+import re
+import tempfile
+from huggingface_hub import HfApi
+# Configuration
+DEFAULT_MODEL = "tiiuae/falcon-7b-instruct"  # Use Falcon-7B as the default model
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"  # Try to use CUDA if available
+MAX_NEW_TOKENS = 512
+TEMPERATURE = 0.7
+HF_TOKEN = os.environ.get("HF_TOKEN") if os.environ.get("HF_TOKEN") else None  # Get token from environment variables
+MAX_RAM_GB = 45  # Set maximum RAM usage to 45GB (below the 70GB limit)
+# Create offload folder for model memory management
+os.makedirs("offload_folder", exist_ok=True)
+# Setup RAM monitoring
+def get_process_memory_usage():
+    """Get the current memory usage of this process in GB"""
+    import psutil
+    process = psutil.Process(os.getpid())
+    return process.memory_info().rss / (1024 * 1024 * 1024)  # Convert to GB
+class PdfExtractor:
+    """Extract text content from PDF files"""
+    @staticmethod
+    def extract_text_from_pdf(pdf_file):
+        """Extract text from a PDF file"""
+        try:
+            reader = PdfReader(pdf_file)
+            text = ""
+            for page in reader.pages:
+                text += page.extract_text() + "\n"
+            return text
+        except Exception as e:
+            print(f"Error extracting text from PDF: {e}")
+            return None
+    @staticmethod
+    def clean_text(text):
+        """Clean and preprocess extracted text"""
+        if not text:
+            return ""
+        # Replace multiple newlines with single newline
+        text = re.sub(r'\n+', '\n', text)
+        # Replace multiple spaces with single space
+        text = re.sub(r'\s+', ' ', text)
+        return text.strip()
+    @staticmethod
+    def chunk_text(text, max_chunk_size=1000, overlap=100):
+        """Split text into chunks of specified size with overlap"""
+        if not text:
+            return []
+        chunks = []
+        start = 0
+        text_length = len(text)
+        while start < text_length:
+            end = min(start + max_chunk_size, text_length)
+            # If we're not at the end, try to break at a sentence or paragraph
+            if end < text_length:
+                # Look for sentence breaks (period, question mark, exclamation mark followed by space)
+                sentence_break = max(
+                    text.rfind('. ', start, end),
+                    text.rfind('? ', start, end),
+                    text.rfind('! ', start, end),
+                    text.rfind('\n', start, end)
+                )
+                if sentence_break > start + max_chunk_size // 2:
+                    end = sentence_break + 1
+            chunks.append(text[start:end].strip())
+            start = end - overlap  # Create overlap with previous chunk
+        return chunks
+class SyntheticDataGenerator:
+    def __init__(self, model_name=DEFAULT_MODEL):
+        self.model_name = model_name
+        self.model = None
+        self.tokenizer = None
+        self.load_model()  # Load the model directly during initialization
+    def load_model(self):
+        """Load the specified model."""
+        # Clear CUDA cache if using GPU to prevent memory fragmentation
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        try:
+            print(f"Loading model {self.model_name} on {DEVICE}...")
+            # Add token for authentication if available
+            tokenizer_kwargs = {}
+            model_kwargs = {
+                "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
+                "device_map": "auto" if torch.cuda.is_available() else None,
+                "low_cpu_mem_usage": True,  # Added to reduce memory usage on CPU
+                "offload_folder": "offload_folder"  # Add offload folder for large models
+            }
+            if HF_TOKEN:
+                tokenizer_kwargs["token"] = HF_TOKEN
+                model_kwargs["token"] = HF_TOKEN
+                print("Using Hugging Face token for authentication")
+            # Load tokenizer
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, **tokenizer_kwargs)
+            # Load the model
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name,
+                **model_kwargs
+            )
+            # Ensure model is on the right device if not using device_map="auto"
+            if not torch.cuda.is_available():
+                self.model = self.model.to(DEVICE)
+            print(f"Model {self.model_name} loaded successfully on {DEVICE}")
+        except Exception as e:
+            print(f"Error loading model {self.model_name}: {e}")
+            self.model = None
+            self.tokenizer = None
+            raise
+    def generate_qa_prompt(self, context, num_questions=3, include_tags=True, difficulty_levels=True):
+        """Generate a prompt for creating Q&A pairs from context."""
+        tag_instruction = ""
+        if include_tags:
+            tag_instruction = "Add 1-3 tags for each question that categorize the topic or subject matter."
+        difficulty_instruction = ""
+        if difficulty_levels:
+            difficulty_instruction = "For each question, assign a difficulty level (easy, medium, or hard)."
+        prompt = f"""Task: Based on the following text, generate {num_questions} question and answer pairs that would be useful for comprehension testing or knowledge assessment.
+CONTEXT:
+{context}
+For each question:
+1. Write a clear, specific question about the information in the text
+2. Provide the correct answer to the question, citing relevant details from the text
+3. {tag_instruction}
+4. {difficulty_instruction}
+Format each Q&A pair as a JSON object with the following structure:
+{{
+  "question": "The question text",
+  "answer": "The answer text",
+  "tags": ["tag1", "tag2"],
+  "difficulty": "easy/medium/hard"
+}}
+Return all Q&A pairs in a JSON array.
+"""
+        return prompt
+    def generate_data(self, prompt, num_samples=1):
+        """Generate synthetic data using the loaded model."""
+        if not self.model or not self.tokenizer:
+            return ["Error: Model not loaded properly. Please try again with a different model."]
+        outputs = []
+        for sample_idx in range(num_samples):
+            try:
+                # Clear CUDA cache before generating to free up memory
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                # ZeroGPU errors often occur in generate() calls
+                # To mitigate this, try multiple approaches in sequence
+                inputs = self.tokenizer(prompt, return_tensors="pt").to(DEVICE)
+                try:
+                    # First try: Standard generation with conservative settings
+                    with torch.no_grad():
+                        output = self.model.generate(
+                            **inputs,
+                            max_new_tokens=MAX_NEW_TOKENS,
+                            temperature=TEMPERATURE,
+                            do_sample=True,
+                            pad_token_id=self.tokenizer.eos_token_id,
+                            num_beams=1,  # Use greedy decoding instead of beam search
+                            early_stopping=True,
+                            no_repeat_ngram_size=3  # Prevent repetition
+                        )
+                    decoded_output = self.tokenizer.decode(output[0], skip_special_tokens=True)
+                except (RuntimeError, Exception) as e:
+                    if "CUDA" in str(e) or "GPU" in str(e) or "ZeroGPU" in str(e):
+                        print(f"GPU error during generation: {e}")
+                        print("Falling back to CPU generation...")
+                        # Move everything to CPU
+                        inputs = {k: v.to('cpu') for k, v in inputs.items()}
+                        # Create CPU copy of the model if we were using GPU
+                        if torch.cuda.is_available():
+                            # Temporarily move model to CPU for this generation
+                            model_cpu = self.model.to('cpu')
+                            with torch.no_grad():
+                                output = model_cpu.generate(
+                                    **inputs,
+                                    max_new_tokens=MAX_NEW_TOKENS,
+                                    temperature=TEMPERATURE,
+                                    do_sample=True,
+                                    pad_token_id=self.tokenizer.eos_token_id,
+                                    num_return_sequences=1,
+                                    max_length=MAX_NEW_TOKENS + inputs['input_ids'].shape[1]
+                                )
+                            decoded_output = self.tokenizer.decode(output[0], skip_special_tokens=True)
+                            # Move model back to CUDA for future calls
+                            self.model = self.model.to(DEVICE)
+                        else:
+                            # Already on CPU, try with reduced parameters
+                            with torch.no_grad():
+                                output = self.model.generate(
+                                    **inputs,
+                                    max_new_tokens=min(256, MAX_NEW_TOKENS),  # Reduce token count
+                                    temperature=0.5,  # Lower temperature
+                                    do_sample=False,  # No sampling
+                                    num_return_sequences=1,
+                                    pad_token_id=self.tokenizer.eos_token_id
+                                )
+                            decoded_output = self.tokenizer.decode(output[0], skip_special_tokens=True)
+                    else:
+                        # Re-raise non-CUDA errors
+                        raise
+                # Extract only the generated part (remove prompt)
+                prompt_text = self.tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)
+                generated_text = decoded_output[len(prompt_text):].strip()
+                outputs.append(generated_text)
+                # Clear CUDA cache between samples
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+            except Exception as e:
+                error_msg = f"Error generating sample {sample_idx+1}: {str(e)}"
+                print(error_msg)
+                outputs.append(f"Error: {error_msg}")
+        return outputs
+    def parse_json_data(self, generated_text):
+        """Extract and parse JSON from generated text."""
+        try:
+            # Find JSON-like content (between [ and ])
+            start_idx = generated_text.find('[')
+            end_idx = generated_text.rfind(']') + 1
+            if start_idx >= 0 and end_idx > start_idx:
+                json_str = generated_text[start_idx:end_idx]
+                return json.loads(json_str)
+            # Try to find single object format
+            start_idx = generated_text.find('{')
+            end_idx = generated_text.rfind('}') + 1
+            if start_idx >= 0 and end_idx > start_idx:
+                json_str = generated_text[start_idx:end_idx]
+                return json.loads(json_str)
+            print(f"Could not find JSON content in: {generated_text}")
+            return None
+        except json.JSONDecodeError as e:
+            print(f"JSON parse error: {e}")
+            print(f"Problematic text: {generated_text}")
+            # Try to find and fix common JSON formatting errors
+            try:
+                # Replace single quotes with double quotes
+                json_str = generated_text[start_idx:end_idx].replace("'", "\"")
+                return json.loads(json_str)
+            except:
+                pass
+            # If still failing, try to extract individual JSON objects
+            try:
+                pattern = r'\{[^{}]*\}'
+                matches = re.findall(pattern, generated_text)
+                if matches:
+                    results = []
+                    for match in matches:
+                        try:
+                            # Replace single quotes with double quotes
+                            fixed_match = match.replace("'", "\"")
+                            obj = json.loads(fixed_match)
+                            results.append(obj)
+                        except:
+                            continue
+                    if results:
+                        return results
+            except:
+                pass
+            return None
+    def generate_qa_from_pdf_chunk(self, chunk, num_questions=3, include_tags=True, difficulty_levels=True):
+        """Generate Q&A pairs from a PDF text chunk."""
+        if not self.model or not self.tokenizer:
+            return [], "Error: Model not loaded properly. Please try again with a different model."
+        if not chunk or len(chunk.strip()) < 100:  # Skip very small chunks
+            return [], "Chunk too small to generate meaningful Q&A pairs."
+        prompt = self.generate_qa_prompt(chunk, num_questions, include_tags, difficulty_levels)
+        raw_outputs = self.generate_data(prompt, num_samples=1)
+        raw_output = raw_outputs[0]
+        parsed_data = self.parse_json_data(raw_output)
+        # Ensure parsed data is a list
+        if parsed_data and isinstance(parsed_data, dict):
+            parsed_data = [parsed_data]
+        # Return both the parsed data and raw output for debugging
+        return parsed_data, raw_output
+def format_data_preview(data):
+    """Format the data for preview in the UI."""
+    if isinstance(data, list):
+        if len(data) > 0 and isinstance(data[0], dict):
+            # Convert list of dicts to DataFrame for better display
+            return pd.DataFrame(data).to_string()
+        else:
+            return json.dumps(data, indent=2)
+    elif isinstance(data, dict):
+        return json.dumps(data, indent=2)
+    else:
+        return str(data)
+def save_data(data, format, filename_prefix):
+    """Save data to a file in the specified format."""
+    os.makedirs("synthetic_data", exist_ok=True)
+    timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"synthetic_data/{filename_prefix}_{timestamp}"
+    if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict):
+        df = pd.DataFrame(data)
+        if format.lower() == "csv":
+            full_filename = f"{filename}.csv"
+            df.to_csv(full_filename, index=False)
+        elif format.lower() == "json":
+            full_filename = f"{filename}.json"
+            with open(full_filename, "w") as f:
+                json.dump(data, f, indent=2)
+        elif format.lower() == "excel":
+            full_filename = f"{filename}.xlsx"
+            df.to_excel(full_filename, index=False)
+        else:
+            full_filename = f"{filename}.txt"
+            with open(full_filename, "w") as f:
+                f.write(str(data))
+    else:
+        full_filename = f"{filename}.{format.lower()}"
+        with open(full_filename, "w") as f:
+            if format.lower() == "json":
+                json.dump(data, f, indent=2)
+            else:
+                f.write(str(data))
+    return full_filename
+def load_models():
+    """Return a list of available models."""
+    return [
+        "tiiuae/falcon-7b-instruct"
+    ]
+@spaces.GPU
+def process_pdf_generate_qa(pdf_file, model_name, num_questions_per_chunk, include_tags, include_difficulty, output_file_format, progress=None):
+    """Process a PDF file and generate Q&A pairs from its content."""
+    if pdf_file is None:
+        return None, "Error: No PDF file uploaded", "", "No file provided"
+    try:
+        # Check RAM usage at start
+        current_ram_usage = get_process_memory_usage()
+        print(f"Starting RAM usage: {current_ram_usage:.2f}GB")
+        # Clear CUDA cache before starting
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        # Initialize extractor and generator
+        extractor = PdfExtractor()
+        generator = SyntheticDataGenerator(model_name)
+        # Wrap model loading in try-except to handle errors
+        try:
+            load_success = generator.load_model()
+            if not load_success:
+                return None, "Error: Failed to load the model. Please try again with a different model.", "", "Model loading failed"
+        except Exception as e:
+            if "ZeroGPU" in str(e) or "GPU task aborted" in str(e) or "CUDA" in str(e):
+                print(f"GPU error during model loading: {e}. Trying with a smaller model...")
+                # If we get a ZeroGPU error, immediately try the smallest model
+                generator.model_name = "tiiuae/falcon-7b-instruct"  # Use default model as fallback
+                load_success = generator.load_model()
+                if not load_success:
+                    return None, "Error: Failed to load any model even after fallback. Please try again later.", "", "Model loading failed"
+            else:
+                # Re-raise other errors
+                raise
+        # Check RAM usage after model loading
+        ram_after_model = get_process_memory_usage()
+        print(f"RAM usage after model loading: {ram_after_model:.2f}GB")
+        # Save PDF temporarily if it's a file object
+        if hasattr(pdf_file, 'name'):
+            # It's already a file path
+            pdf_path = pdf_file.name
+        else:
+            # Create a temporary file
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
+                tmp.write(pdf_file)
+                pdf_path = tmp.name
+        # Extract text from PDF
+        pdf_text = extractor.extract_text_from_pdf(pdf_path)
+        if not pdf_text:
+            return None, "Failed to extract text from PDF", "", "No data generated"
+        # Clean and chunk the text - reduce chunk size to use less memory
+        cleaned_text = extractor.clean_text(pdf_text)
+        chunks = extractor.chunk_text(cleaned_text, max_chunk_size=400, overlap=30)
+        # Check RAM after PDF processing
+        ram_after_pdf = get_process_memory_usage()
+        print(f"RAM usage after PDF processing: {ram_after_pdf:.2f}GB, found {len(chunks)} chunks")
+        # If we're approaching the RAM limit already, reduce batch size
+        batch_size = 3  # Default
+        if ram_after_pdf > MAX_RAM_GB * 0.7:  # If already using 70% of our limit
+            batch_size = 1  # Process one chunk at a time
+            print(f"High RAM usage detected ({ram_after_pdf:.2f}GB), reducing batch size to 1")
+        elif ram_after_pdf > MAX_RAM_GB * 0.5:  # If using 50% of our limit
+            batch_size = 2  # Process two chunks at a time
+            print(f"Moderate RAM usage detected ({ram_after_pdf:.2f}GB), reducing batch size to 2")
+        # Generate Q&A pairs for each chunk
+        all_qa_pairs = []
+        all_raw_outputs = []
+        total_chunks = len(chunks)
+        # Process chunks in smaller batches to avoid memory buildup
+        for i in range(0, total_chunks, batch_size):
+            # Get the current batch of chunks
+            batch_chunks = chunks[i:min(i+batch_size, total_chunks)]
+            # Process each chunk in the batch
+            for j, chunk in enumerate(batch_chunks):
+                chunk_index = i + j
+                if progress is not None:
+                    progress(chunk_index / total_chunks, f"Processing chunk {chunk_index+1}/{total_chunks}")
+                # Check if we're approaching RAM limit
+                current_ram = get_process_memory_usage()
+                if current_ram > MAX_RAM_GB * 0.9:  # Over 90% of our limit
+                    print(f"WARNING: High RAM usage detected: {current_ram:.2f}GB - force releasing memory")
+                    import gc
+                    gc.collect()  # Force garbage collection
+                    if torch.cuda.is_available():
+                        torch.cuda.empty_cache()
+                    # If still too high after garbage collection, abort batch processing
+                    current_ram = get_process_memory_usage()
+                    if current_ram > MAX_RAM_GB * 0.95:  # Still dangerously high
+                        print(f"CRITICAL: RAM usage too high ({current_ram:.2f}GB), stopping processing")
+                        break
+                # Clear CUDA cache between chunks
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                try:
+                    qa_pairs, raw_output = generator.generate_qa_from_pdf_chunk(
+                        chunk,
+                        num_questions=num_questions_per_chunk,
+                        include_tags=include_tags,
+                        difficulty_levels=include_difficulty
+                    )
+                except Exception as e:
+                    error_type = str(e)
+                    if "CUDA" in error_type or "GPU" in error_type or "ZeroGPU" in error_type:
+                        print(f"GPU error during generation for chunk {chunk_index+1}: {e}")
+                        # Fall back to CPU for this specific generation
+                        raw_output = f"Error in chunk {chunk_index+1}: {str(e)}. Skipping..."
+                        qa_pairs = None
+                    elif "memory" in error_type.lower() or "ram" in error_type.lower():
+                        print(f"Memory error processing chunk {chunk_index+1}: {e}")
+                        # Force garbage collection and skip chunk
+                        import gc
+                        gc.collect()
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+                        raw_output = f"Memory error in chunk {chunk_index+1}: {str(e)}. Skipping..."
+                        qa_pairs = None
+                    else:
+                        # For other errors, just log and continue
+                        print(f"Error processing chunk {chunk_index+1}: {e}")
+                        raw_output = f"Error in chunk {chunk_index+1}: {str(e)}"
+                        qa_pairs = None
+                if qa_pairs:
+                    all_qa_pairs.extend(qa_pairs)
+                all_raw_outputs.append(raw_output)
+                # Check RAM usage after processing this chunk
+                current_ram = get_process_memory_usage()
+                print(f"RAM after chunk {chunk_index+1}: {current_ram:.2f}GB")
+            # Do a thorough cleanup after each batch
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            # Force garbage collection between batches
+            import gc
+            gc.collect()
+            # Check if we need to abort due to memory constraints
+            current_ram = get_process_memory_usage()
+            if current_ram > MAX_RAM_GB:
+                print(f"WARNING: Exceeding RAM limit ({current_ram:.2f}GB). Stopping further processing.")
+                if progress is not None:
+                    progress(1.0, f"Stopped early due to high memory usage ({current_ram:.2f}GB)")
+                break
+        if progress is not None:
+            progress(1.0, "Finished processing")
+        # Final cache clear and garbage collection
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        import gc
+        gc.collect()
+        if not all_qa_pairs:
+            return None, "Failed to generate Q&A pairs", "\n\n".join(all_raw_outputs), "No data generated"
+        # Save data to file
+        filename = save_data(
+            all_qa_pairs,
+            output_file_format,
+            "qa_dataset"
+        )
+        # Format for display
+        formatted_data = format_data_preview(all_qa_pairs)
+        # Final memory report
+        final_ram = get_process_memory_usage()
+        print(f"Final RAM usage: {final_ram:.2f}GB")
+        return all_qa_pairs, formatted_data, "\n\n".join(all_raw_outputs), f"Data saved to {filename}"
+    except Exception as e:
+        error_msg = f"Error processing PDF: {str(e)}"
+        print(error_msg)
+        import traceback
+        print(traceback.format_exc())
+        return None, error_msg, "", "Processing failed"
+# Set up the Gradio interface
+def create_interface():
+    with gr.Blocks(title="PDF Q&A Dataset Generator") as app:
+        gr.Markdown("# 📚 PDF Q&A Dataset Generator")
+        gr.Markdown("""
+        Generate question & answer datasets from PDF documents using instruction-tuned language models.
+        Perfect for creating educational resources, quiz materials, or training data for Q&A systems.
+        """)
+        with gr.Tabs() as tabs:
+            with gr.TabItem("Generate Q&A Dataset"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        pdf_file = gr.File(
+                            label="Upload PDF",
+                            file_types=[".pdf"],
+                            type="binary"
+                        )
+                        model_dropdown = gr.Dropdown(
+                            choices=load_models(),
+                            value=DEFAULT_MODEL,
+                            label="Model"
+                        )
+                        num_questions = gr.Slider(
+                            minimum=1,
+                            maximum=5,
+                            value=3,
+                            step=1,
+                            label="Questions per Section"
+                        )
+                        include_tags = gr.Checkbox(
+                            value=True,
+                            label="Include Tags"
+                        )
+                        include_difficulty = gr.Checkbox(
+                            value=True,
+                            label="Include Difficulty Levels"
+                        )
+                        output_file_format = gr.Radio(
+                            choices=["json", "csv", "excel"],
+                            value="json",
+                            label="Save File Format"
+                        )
+                        generate_btn = gr.Button("Generate Q&A Dataset", variant="primary")
+                        progress_bar = gr.Progress()
+                    with gr.Column(scale=2):
+                        with gr.Tab("Parsed Data"):
+                            parsed_data_output = gr.JSON(label="Generated Q&A Pairs")
+                            formatted_data_output = gr.Textbox(
+                                label="Formatted Preview",
+                                lines=15
+                            )
+                        with gr.Tab("Raw Output"):
+                            raw_output = gr.Textbox(
+                                label="Raw Model Output",
+                                lines=15
+                            )
+                        file_output = gr.Textbox(label="File Output")
+            with gr.TabItem("Documentation"):
+                gr.Markdown("""
+                ## How to Use
+                1. **Upload a PDF**: Select a PDF document containing the content you want to generate questions from.
+                2. **Select a model**: Choose an instruction-tuned language model from the dropdown.
+                3. **Configure settings**:
+                   - Set the number of questions to generate per text section
+                   - Choose whether to include tags and difficulty levels
+                   - Select your preferred output file format
+                4. **Generate dataset**: Click the "Generate Q&A Dataset" button to create your dataset.
+                ## About This App
+                This app uses instruction-tuned language models to generate question and answer pairs from PDF documents. It:
+                1. Extracts text from the uploaded PDF
+                2. Splits the text into manageable chunks
+                3. Generates questions, answers, tags, and difficulty levels for each chunk
+                4. Combines all Q&A pairs into a comprehensive dataset
+                ### Features:
+                - Automatic text extraction from PDFs
+                - Smart text chunking to maintain context
+                - Customizable number of questions per chunk
+                - Optional tagging and difficulty classification
+                - Multiple output formats (JSON, CSV, Excel)
+                ### Use Cases:
+                - Create educational resources and quiz materials
+                - Generate training data for Q&A systems
+                - Build flashcard datasets for studying
+                - Develop content for educational applications
+                """)
+            with gr.TabItem("Status"):
+                gr.Markdown("""
+                ## System Status
+                This app runs on CPU mode. Some larger models might be slower to load and generate content.
+                If you encounter any issues with a specific model, try switching to a smaller model like `tiiuae/falcon-7b-instruct`.
+                ### Troubleshooting
+                - If the app seems unresponsive after clicking "Generate", please be patient - model loading may take time.
+                - If you get an error about model loading, try refreshing the page and selecting a different model.
+                - Not all PDFs can be properly processed - if text extraction fails, try with a different PDF.
+                """)
+        # Event handler for generate button
+        generate_btn.click(
+            process_pdf_generate_qa,
+            inputs=[
+                pdf_file,
+                model_dropdown,
+                num_questions,
+                include_tags,
+                include_difficulty,
+                output_file_format
+            ],
+            outputs=[parsed_data_output, formatted_data_output, raw_output, file_output],
+            show_progress=True
+        )
+    return app
+# Export the app for Hugging Face Spaces
+app = create_interface()
+# Launch the app depending on the environment
+if __name__ == "__main__":
+    app.launch()