Spaces:

HebaElshimy
/

systematic-reviews

Sleeping

App Files Files Community

HebaElshimy commited on 26 days ago

Commit

1621c4e

verified ·

1 Parent(s): fee9667

Upload 2 files

Browse files

Files changed (1) hide show

app.py +621 -564

app.py CHANGED Viewed

@@ -1,469 +1,357 @@
 import gradio as gr
 import pandas as pd
-import numpy as np
 import torch
-from transformers import (
-    pipeline,
-    AutoTokenizer,
-    AutoModel,
-    AutoModelForSequenceClassification
-)
 from sentence_transformers import SentenceTransformer, CrossEncoder
 import re
-from typing import List, Dict, Tuple, Optional
-import warnings
-warnings.filterwarnings('ignore')
 # ============================================================================
-# ADVANCED MODEL INITIALIZATION
 # ============================================================================
-class AdvancedMedicalScreener:
-    def __init__(self):
-        """Initialize all advanced NLP models for medical literature screening"""
-        print("🚀 Initializing Advanced Medical Screening Models...")
-        # 1. Biomedical language model for embeddings
-        print("Loading PubMedBERT for medical text understanding...")
-        self.pubmed_tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
-        self.pubmed_model = AutoModel.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
-        # 2. Cross-encoder for accurate semantic similarity
-        print("Loading Cross-Encoder for semantic matching...")
-        self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2', max_length=512)
-        # 3. Zero-shot classifier for criteria matching
-        print("Loading Zero-Shot Classifier...")
-        self.zero_shot = pipeline(
-            "zero-shot-classification",
-            model="facebook/bart-large-mnli",
-            device=0 if torch.cuda.is_available() else -1
-        )
-        # 4. Sentence transformer for fast similarity
-        print("Loading Sentence Transformer...")
-        self.sentence_model = SentenceTransformer('pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb')
-        # 5. Medical NER for entity extraction (optional, lightweight)
-        print("Loading Medical NER model...")
-        try:
-            self.ner_pipeline = pipeline(
-                "ner",
-                model="dmis-lab/biobert-base-cased-v1.2",
-                aggregation_strategy="simple"
-            )
-        except:
-            self.ner_pipeline = None
-            print("Note: Medical NER model not available, using fallback")
-        print("✅ All models loaded successfully!")
-        # Medical terminology expansions
-        self.medical_synonyms = {
-            'rct': ['randomized controlled trial', 'randomised controlled trial', 'randomized clinical trial'],
-            'pain': ['pain', 'nociception', 'analgesia', 'hyperalgesia', 'allodynia', 'neuropathic pain',
-                    'chronic pain', 'acute pain', 'postoperative pain', 'pain management'],
-            'surgery': ['surgery', 'surgical', 'operation', 'operative', 'postoperative', 'perioperative',
-                       'preoperative', 'surgical procedure', 'surgical intervention'],
-            'study design': ['study design', 'trial design', 'research design', 'methodology',
-                           'randomized', 'controlled', 'cohort', 'case-control', 'cross-sectional',
-                           'prospective', 'retrospective', 'observational', 'experimental'],
-            'systematic review': ['systematic review', 'meta-analysis', 'meta analysis', 'evidence synthesis'],
-            'case report': ['case report', 'case study', 'case series', 'case presentation'],
-            'clinical trial': ['clinical trial', 'clinical study', 'trial', 'intervention study'],
-        }
-        # Study design hierarchy for classification
-        self.study_designs = {
-            'high_quality': ['randomized controlled trial', 'systematic review', 'meta-analysis'],
-            'moderate_quality': ['cohort study', 'case-control study', 'controlled trial'],
-            'low_quality': ['case report', 'case series', 'opinion', 'editorial'],
-            'observational': ['cohort', 'case-control', 'cross-sectional', 'observational'],
-            'experimental': ['randomized', 'experimental', 'intervention', 'trial']
-        }
-    def get_pubmed_embedding(self, text: str) -> np.ndarray:
-        """Get PubMedBERT embedding for medical text"""
-        inputs = self.pubmed_tokenizer(
-            text,
-            return_tensors="pt",
-            truncation=True,
-            max_length=512,
-            padding=True
-        )
         with torch.no_grad():
-            outputs = self.pubmed_model(**inputs)
-            # Use CLS token embedding
             embedding = outputs.last_hidden_state[:, 0, :].numpy()
         return embedding.squeeze()
-    def expand_medical_terms(self, term: str) -> List[str]:
-        """Expand medical terms with synonyms and related concepts"""
-        term_lower = term.lower()
-        expanded = [term]
-        # Check for known medical synonyms
-        for key, synonyms in self.medical_synonyms.items():
-            if key in term_lower or any(syn in term_lower for syn in synonyms):
-                expanded.extend(synonyms)
-        # Add variations
-        if 'pain' in term_lower:
-            expanded.extend(['analgesic', 'nociceptive', 'painful'])
-        if 'surgery' in term_lower or 'surgical' in term_lower:
-            expanded.extend(['surgeon', 'resection', 'excision', 'incision'])
-        return list(set(expanded))
-    def parse_advanced_criteria(self, criteria_text: str) -> Dict:
-        """Advanced parsing of inclusion/exclusion criteria with medical understanding"""
-        criteria = {
-            'population': [],
-            'intervention': [],
-            'comparator': [],
-            'outcomes': [],
-            'study_design': [],
-            'include_general': [],
-            'exclude_general': [],
-            'pain_related': [],
-            'surgery_related': []
-        }
-        lines = criteria_text.split('\n')
-        current_section = None
-        is_exclusion = False
-        for line in lines:
-            line_clean = line.strip()
-            line_lower = line_clean.lower()
-            if not line_clean:
-                continue
-            # Detect exclusion context
-            if 'exclude' in line_lower:
-                is_exclusion = True
-                current_section = 'exclude_general'
-            elif 'include' in line_lower:
-                is_exclusion = False
-                current_section = 'include_general'
-            # Detect PICOS sections
-            elif any(term in line_lower for term in ['population:', 'participants:', 'patients:']):
-                current_section = 'population'
-            elif any(term in line_lower for term in ['intervention:', 'exposure:', 'treatment:']):
-                current_section = 'intervention'
-            elif any(term in line_lower for term in ['comparator:', 'control:', 'comparison:']):
-                current_section = 'comparator'
-            elif any(term in line_lower for term in ['outcome:', 'endpoint:', 'measure:']):
-                current_section = 'outcomes'
-            elif any(term in line_lower for term in ['study design:', 'design:', 'study type:', 'methodology:']):
-                current_section = 'study_design'
-            # Special detection for pain and surgery
-            elif 'pain' in line_lower:
-                current_section = 'pain_related'
-            elif any(term in line_lower for term in ['surgery', 'surgical', 'operation']):
-                current_section = 'surgery_related'
-            # Extract criteria items
-            elif current_section:
-                # Handle bullet points or dashes
-                if line_clean.startswith(('-', '•', '*', '·')):
-                    item = line_clean[1:].strip()
-                    if item:
-                        # Expand medical terms
-                        expanded_items = self.expand_medical_terms(item)
-                        criteria[current_section].extend(expanded_items)
-                # Handle comma-separated items
-                elif ',' in line_clean and ':' not in line_clean:
-                    items = [i.strip() for i in line_clean.split(',')]
-                    for item in items:
-                        if item and len(item) > 2:
-                            expanded_items = self.expand_medical_terms(item)
-                            criteria[current_section].extend(expanded_items)
-                # Handle single items
-                elif line_clean and not any(marker in line_lower for marker in [':', 'population', 'intervention', 'outcome']):
-                    expanded_items = self.expand_medical_terms(line_clean)
-                    criteria[current_section].extend(expanded_items)
-        # Remove duplicates
-        for key in criteria:
-            criteria[key] = list(set(criteria[key]))
-        return criteria
-    def cross_encoder_score(self, text: str, criteria: str) -> float:
-        """Calculate cross-encoder similarity score"""
         try:
-            score = self.cross_encoder.predict([[text, criteria]])
-            # Normalize to 0-1 range
-            return float(1 / (1 + np.exp(-score[0])))
         except:
-            return 0.0
-    def zero_shot_classify(self, text: str, labels: List[str], hypothesis_template: str = "This study is about {}") -> Dict:
-        """Perform zero-shot classification with custom hypothesis"""
-        if not labels:
-            return {}
         try:
-            result = self.zero_shot(
-                text,
-                candidate_labels=labels,
-                hypothesis_template=hypothesis_template,
-                multi_label=True
-            )
-            # Convert to dictionary with scores
-            scores = {}
-            for label, score in zip(result['labels'], result['scores']):
-                scores[label] = score
-            return scores
         except:
-            return {}
-    def evaluate_study_design(self, text: str) -> Dict:
-        """Evaluate study design quality and type"""
-        design_labels = [
-            'randomized controlled trial',
-            'systematic review',
-            'meta-analysis',
-            'cohort study',
-            'case-control study',
-            'cross-sectional study',
-            'case report',
-            'observational study',
-            'experimental study'
-        ]
-        scores = self.zero_shot_classify(
-            text,
-            design_labels,
-            hypothesis_template="This is a {}"
-        )
-        # Determine quality level
-        quality = 'unknown'
-        max_design = max(scores.items(), key=lambda x: x[1])[0] if scores else ''
-        for level, designs in self.study_designs.items():
-            if any(design in max_design.lower() for design in designs):
-                quality = level
-                break
-        return {
-            'design_scores': scores,
-            'primary_design': max_design,
-            'quality_level': quality
-        }
-    def evaluate_pain_surgery_relevance(self, text: str) -> Dict:
-        """Specifically evaluate pain and surgery relevance"""
-        # Pain-related evaluation
-        pain_terms = [
-            'chronic pain', 'acute pain', 'postoperative pain',
-            'pain management', 'analgesia', 'neuropathic pain',
-            'pain relief', 'pain control', 'pain assessment'
-        ]
-        pain_scores = self.zero_shot_classify(
-            text,
-            pain_terms,
-            hypothesis_template="This study involves {}"
-        )
-        # Surgery-related evaluation
-        surgery_terms = [
-            'surgical procedure', 'postoperative', 'perioperative',
-            'surgical intervention', 'operation', 'surgical outcomes',
-            'surgical complications', 'surgical technique'
-        ]
-        surgery_scores = self.zero_shot_classify(
-            text,
-            surgery_terms,
-            hypothesis_template="This study involves {}"
-        )
-        return {
-            'pain_relevance': max(pain_scores.values()) if pain_scores else 0,
-            'surgery_relevance': max(surgery_scores.values()) if surgery_scores else 0,
-            'pain_terms': pain_scores,
-            'surgery_terms': surgery_scores
-        }
-    def stage1_advanced_classification(self, title: str, abstract: str, criteria_text: str) -> Dict:
-        """Advanced Stage 1 classification using multiple NLP models"""
-        # Combine text
-        study_text = f"{title} {abstract}"
-        if len(study_text.strip()) < 20:
-            return {
-                'decision': 'UNCLEAR',
-                'confidence': 0,
-                'reasoning': 'Insufficient text for analysis',
-                'detailed_scores': {}
-            }
-        # Parse criteria with medical understanding
-        criteria = self.parse_advanced_criteria(criteria_text)
-        # Initialize scoring components
-        scores = {
-            'population': 0,
-            'intervention': 0,
-            'comparator': 0,
-            'outcomes': 0,
-            'study_design': 0,
-            'inclusion': 0,
-            'exclusion': 0,
-            'pain_relevance': 0,
-            'surgery_relevance': 0
-        }
-        reasoning_parts = []
-        # 1. Evaluate PICOS elements using cross-encoder
-        for element in ['population', 'intervention', 'comparator', 'outcomes']:
-            if criteria[element]:
-                element_scores = []
-                for criterion in criteria[element][:5]:  # Limit to top 5 to avoid overload
-                    score = self.cross_encoder_score(study_text, criterion)
-                    element_scores.append(score)
-                if element_scores:
-                    scores[element] = max(element_scores)
-                    if scores[element] > 0.5:
-                        best_match = criteria[element][element_scores.index(max(element_scores))]
-                        reasoning_parts.append(f"{element.capitalize()}: '{best_match}' ({scores[element]:.2f})")
-        # 2. Evaluate study design
-        design_eval = self.evaluate_study_design(study_text)
-        scores['study_design'] = max(design_eval['design_scores'].values()) if design_eval['design_scores'] else 0
-        if scores['study_design'] > 0.5:
-            reasoning_parts.append(f"Study Design: {design_eval['primary_design']} ({scores['study_design']:.2f})")
-        # 3. Evaluate pain and surgery relevance if applicable
-        if criteria['pain_related'] or 'pain' in criteria_text.lower():
-            pain_surgery_eval = self.evaluate_pain_surgery_relevance(study_text)
-            scores['pain_relevance'] = pain_surgery_eval['pain_relevance']
-            if scores['pain_relevance'] > 0.5:
-                reasoning_parts.append(f"Pain Relevance: {scores['pain_relevance']:.2f}")
-        if criteria['surgery_related'] or 'surgery' in criteria_text.lower():
-            pain_surgery_eval = self.evaluate_pain_surgery_relevance(study_text)
-            scores['surgery_relevance'] = pain_surgery_eval['surgery_relevance']
-            if scores['surgery_relevance'] > 0.5:
-                reasoning_parts.append(f"Surgery Relevance: {scores['surgery_relevance']:.2f}")
-        # 4. Evaluate inclusion criteria
-        if criteria['include_general']:
-            inclusion_scores = []
-            for criterion in criteria['include_general'][:3]:
-                score = self.cross_encoder_score(study_text, criterion)
-                inclusion_scores.append(score)
-            scores['inclusion'] = max(inclusion_scores) if inclusion_scores else 0
-            if scores['inclusion'] > 0.5:
-                reasoning_parts.append(f"Inclusion Match: {scores['inclusion']:.2f}")
-        # 5. Evaluate exclusion criteria
-        if criteria['exclude_general']:
-            exclusion_scores = []
-            for criterion in criteria['exclude_general'][:3]:
-                score = self.cross_encoder_score(study_text, criterion)
-                exclusion_scores.append(score)
-            scores['exclusion'] = max(exclusion_scores) if exclusion_scores else 0
-            if scores['exclusion'] > 0.6:
-                reasoning_parts.append(f"EXCLUSION Match: {scores['exclusion']:.2f}")
-        # 6. Check for low-quality study designs
-        if design_eval.get('quality_level') == 'low_quality':
-            scores['exclusion'] = max(scores['exclusion'], 0.7)
-            reasoning_parts.append(f"Low Quality Design: {design_eval['primary_design']}")
-        # Decision Logic with Confidence Calibration
-        decision, confidence = self._make_decision_stage1(scores, design_eval)
-        # Format reasoning
-        if not reasoning_parts:
-            reasoning_parts.append("No strong matches found")
-        reasoning = f"Stage 1 {decision}: {'; '.join(reasoning_parts)}"
-        return {
-            'decision': decision,
-            'confidence': confidence,
-            'reasoning': reasoning,
-            'detailed_scores': scores,
-            'study_design': design_eval.get('primary_design', 'Unknown'),
-            'quality_level': design_eval.get('quality_level', 'Unknown')
-        }
-    def _make_decision_stage1(self, scores: Dict, design_eval: Dict) -> Tuple[str, int]:
-        """Make Stage 1 decision based on scores with calibrated confidence"""
-        # Strong exclusion criteria
-        if scores['exclusion'] > 0.65:
-            confidence = min(int(scores['exclusion'] * 100), 90)
-            return 'EXCLUDE', confidence
-        # Low quality design exclusion
-        if design_eval.get('quality_level') == 'low_quality' and scores['study_design'] > 0.7:
-            return 'EXCLUDE', 75
-        # Calculate inclusion strength
-        picos_scores = [scores['population'], scores['intervention'], scores['outcomes']]
-        relevant_picos = sum(1 for s in picos_scores if s > 0.5)
-        avg_picos = np.mean([s for s in picos_scores if s > 0.3]) if any(s > 0.3 for s in picos_scores) else 0
-        # Strong inclusion - multiple PICOS matches
-        if relevant_picos >= 2 and avg_picos > 0.6:
-            confidence = min(int(avg_picos * 85), 85)
-            return 'INCLUDE', confidence
-        # Moderate inclusion - some relevant matches
-        if relevant_picos >= 1 or scores['inclusion'] > 0.6:
-            best_score = max(scores['population'], scores['intervention'], scores['outcomes'], scores['inclusion'])
-            confidence = min(int(best_score * 75), 75)
-            return 'INCLUDE', confidence
-        # Special consideration for pain/surgery studies
-        if (scores['pain_relevance'] > 0.6 or scores['surgery_relevance'] > 0.6) and \
-           design_eval.get('quality_level') in ['high_quality', 'moderate_quality']:
-            confidence = 70
-            return 'INCLUDE', confidence
-        # Weak matches - need manual review
-        if any(s > 0.4 for s in [scores['population'], scores['intervention'], scores['outcomes']]):
-            return 'UNCLEAR', 50
-        # No relevant matches
-        return 'EXCLUDE', 60
 # ============================================================================
-# GRADIO INTERFACE FUNCTIONS
 # ============================================================================
-# Initialize the screener globally
-screener = None
-def initialize_screener():
-    """Initialize the screener if not already done"""
-    global screener
-    if screener is None:
-        screener = AdvancedMedicalScreener()
-    return screener
-def process_stage1_advanced(file, title_col, abstract_col, criteria, sample_size):
-    """Process Stage 1 screening with advanced NLP models"""
     try:
-        # Initialize screener
-        model = initialize_screener()
-        # Read CSV
         df = pd.read_csv(file.name)
         if sample_size < len(df):
             df = df.head(sample_size)
@@ -476,16 +364,13 @@ def process_stage1_advanced(file, title_col, abstract_col, criteria, sample_size
             if not title and not abstract:
                 continue
-            # Use advanced classification
-            classification = model.stage1_advanced_classification(title, abstract, criteria)
             result = {
                 'Study_ID': idx + 1,
                 'Title': title[:100] + "..." if len(title) > 100 else title,
                 'Stage1_Decision': classification['decision'],
                 'Stage1_Confidence': f"{classification['confidence']}%",
-                'Study_Design': classification.get('study_design', 'Unknown'),
-                'Quality_Level': classification.get('quality_level', 'Unknown'),
                 'Stage1_Reasoning': classification['reasoning'],
                 'Ready_for_Stage2': 'Yes' if classification['decision'] == 'INCLUDE' else 'No',
                 'Full_Title': title,
@@ -495,38 +380,29 @@ def process_stage1_advanced(file, title_col, abstract_col, criteria, sample_size
         results_df = pd.DataFrame(results)
-        # Generate summary
         total = len(results_df)
         included = len(results_df[results_df['Stage1_Decision'] == 'INCLUDE'])
         excluded = len(results_df[results_df['Stage1_Decision'] == 'EXCLUDE'])
         unclear = len(results_df[results_df['Stage1_Decision'] == 'UNCLEAR'])
-        # Quality breakdown
-        quality_counts = results_df['Quality_Level'].value_counts().to_dict()
-        quality_summary = "\n".join([f"  - {level}: {count}" for level, count in quality_counts.items()])
         summary = f"""
-## 📊 Advanced Stage 1 Results (AI-Powered Medical Screening)
-**Screening Complete with Advanced NLP Models:**
-- **Total Studies Analyzed:** {total}
-- **✅ Include for Stage 2:** {included} ({included/total*100:.1f}%)
-- **❌ Exclude:** {excluded} ({excluded/total*100:.1f}%)
-- **⚠️ Needs Manual Review:** {unclear} ({unclear/total*100:.1f}%)
-**Study Quality Distribution:**
-{quality_summary}
-**Models Used:**
-- PubMedBERT for medical text understanding
-- Cross-encoder for semantic similarity
-- Zero-shot classification for criteria matching
-- Medical NER for entity extraction
 **Next Steps:**
 1. Review {unclear} studies marked as UNCLEAR
 2. Proceed to Stage 2 with {included} included studies
-3. Consider manual validation of borderline cases
         """
         return summary, results_df, results_df.to_csv(index=False)
@@ -534,28 +410,103 @@ def process_stage1_advanced(file, title_col, abstract_col, criteria, sample_size
     except Exception as e:
         return f"Error: {str(e)}", None, ""
-def create_advanced_interface():
-    """Create the Gradio interface with advanced NLP capabilities"""
-    with gr.Blocks(title="🔬 Advanced Medical Literature Screening", theme=gr.themes.Soft()) as interface:
-        gr.Markdown("""
-        # 🔬 Advanced Medical Literature Screening with AI
-        **State-of-the-art NLP models for systematic review screening**
-        This tool uses advanced transformer models specifically trained on medical literature:
-        - **PubMedBERT**: Understands medical terminology and concepts
-        - **Cross-Encoders**: Accurate semantic matching for criteria
-        - **Zero-Shot Classification**: Flexible criteria evaluation
-        - **Medical NER**: Extracts medical entities automatically
-        Optimized for **pain**, **surgery**, and **study design** criteria, with general medical understanding.
         """)
         with gr.Tabs():
             # STAGE 1 TAB
-            with gr.TabItem("📋 Stage 1: Advanced Title/Abstract Screening"):
                 with gr.Row():
                     with gr.Column(scale=1):
                         gr.Markdown("### 📁 Upload Study Data")
@@ -570,113 +521,200 @@ def create_advanced_interface():
                             stage1_title_col = gr.Dropdown(label="Title Column", choices=[], interactive=True)
                             stage1_abstract_col = gr.Dropdown(label="Abstract Column", choices=[], interactive=True)
-                        stage1_sample = gr.Slider(
-                            label="Studies to Process",
-                            minimum=5,
-                            maximum=500,
-                            value=100,
-                            step=5,
-                            info="Processing time increases with more studies"
-                        )
                     with gr.Column(scale=1):
-                        gr.Markdown("### 🎯 Inclusion/Exclusion Criteria")
                         stage1_criteria = gr.Textbox(
-                            label="Enter your criteria (understands medical terminology)",
                             value="""POPULATION:
-- Adult patients
-- Chronic pain patients
-- Surgical patients
 INTERVENTION:
-- Pain management interventions
-- Surgical procedures
-- Analgesic treatments
 OUTCOMES:
-- Pain intensity
-- Pain relief
-- Functional outcomes
-- Quality of life
 STUDY DESIGN:
 - Randomized controlled trials
-- Systematic reviews
 - Cohort studies
-- NOT case reports
 EXCLUDE:
 - Animal studies
-- Pediatric only
 - Case reports
-- Editorials""",
-                            lines=20,
-                            info="The AI understands medical synonyms and related terms"
                         )
-                with gr.Row():
-                    stage1_process_btn = gr.Button(
-                        "🚀 Start Advanced AI Screening",
-                        variant="primary",
-                        scale=2
-                    )
-                    gr.Markdown("*First run may take longer to load models*", scale=1)
                 stage1_results = gr.Markdown()
-                stage1_table = gr.Dataframe(
-                    label="Stage 1 Results with Quality Assessment",
-                    wrap=True
-                )
                 stage1_download_data = gr.Textbox(visible=False)
-                stage1_download_btn = gr.DownloadButton(
-                    label="💾 Download Stage 1 Results",
-                    visible=False
-                )
-            # HELP TAB
-            with gr.TabItem("❓ Help & Guidelines"):
                 gr.Markdown("""
-                ## 🤖 Advanced Features Explained
-                ### **Medical Understanding**
-                The system automatically:
-                - Recognizes medical synonyms (e.g., RCT = randomized controlled trial)
-                - Understands pain-related terms (nociception, analgesia, hyperalgesia)
-                - Identifies surgical concepts (perioperative, postoperative, resection)
-                - Evaluates study quality based on design
-                ### **How to Write Effective Criteria**
-                1. **Be specific but comprehensive:**
-                   - ✅ "chronic pain lasting > 3 months"
-                   - ✅ "postoperative pain management"
-                   - ❌ "pain" (too vague)
-                2. **Use medical terms freely:**
-                   - The AI understands medical terminology
-                   - It will automatically expand terms with synonyms
-                   - Example: "surgery" → surgical, operation, resection, etc.
-                3. **Specify study designs clearly:**
-                   - High quality: RCT, systematic review, meta-analysis
-                   - Moderate: cohort, case-control
-                   - Low: case reports, opinions
-                ### **Confidence Scores**
-                - **80-100%**: Strong match, high confidence
-                - **60-79%**: Good match, moderate confidence
-                - **40-59%**: Weak match, needs review
-                - **0-39%**: Poor match, likely exclude
-                ### **Tips for Best Results**
-                - Include both inclusion AND exclusion criteria
-                - Specify population, intervention, and outcomes
-                - Mention specific study designs to include/exclude
-                - The AI works best with complete abstracts
                 """)
-        # Event handlers
         def update_stage1_columns(file):
             if file is None:
                 return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
@@ -689,31 +727,50 @@ EXCLUDE:
             except:
                 return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
-        stage1_file.change(
-            fn=update_stage1_columns,
-            inputs=[stage1_file],
-            outputs=[stage1_title_col, stage1_abstract_col]
-        )
-        def process_with_download(*args):
-            summary, table, csv_data = process_stage1_advanced(*args)
             return summary, table, csv_data, gr.DownloadButton(visible=bool(csv_data))
         stage1_process_btn.click(
-            fn=process_with_download,
             inputs=[stage1_file, stage1_title_col, stage1_abstract_col, stage1_criteria, stage1_sample],
             outputs=[stage1_results, stage1_table, stage1_download_data, stage1_download_btn]
         )
-        stage1_download_btn.click(
-            lambda data: data,
-            inputs=[stage1_download_data],
-            outputs=[gr.File()]
         )
     return interface
 if __name__ == "__main__":
-    print("Starting Advanced Medical Literature Screening System...")
-    interface = create_advanced_interface()
     interface.launch()

 import gradio as gr
 import pandas as pd
+import requests
+import json
+from transformers import pipeline, AutoTokenizer, AutoModel
 import torch
 from sentence_transformers import SentenceTransformer, CrossEncoder
+import time
+from typing import List, Dict, Tuple
 import re
+import numpy as np
 # ============================================================================
+# ADVANCED NLP MODELS INITIALIZATION
 # ============================================================================
+print("Loading advanced models...")
+# Initialize advanced models
+try:
+    # Cross-encoder for accurate semantic similarity
+    cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2', max_length=512)
+    # Zero-shot classifier for criteria matching
+    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+    # Medical sentence transformer
+    sentence_model = SentenceTransformer('pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb')
+    # PubMedBERT for medical text understanding
+    pubmed_tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
+    pubmed_model = AutoModel.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
+    print("Advanced models loaded successfully!")
+    USE_ADVANCED_MODELS = True
+except Exception as e:
+    print(f"Warning: Could not load advanced models, falling back to basic models. Error: {e}")
+    # Fallback to basic models
+    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+    similarity_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
+    USE_ADVANCED_MODELS = False
+    print("Basic models loaded successfully!")
+# Medical terminology expansions
+MEDICAL_SYNONYMS = {
+    'rct': ['randomized controlled trial', 'randomised controlled trial', 'randomized clinical trial'],
+    'pain': ['pain', 'nociception', 'analgesia', 'hyperalgesia', 'allodynia', 'neuropathic pain',
+            'chronic pain', 'acute pain', 'postoperative pain', 'pain management'],
+    'surgery': ['surgery', 'surgical', 'operation', 'operative', 'postoperative', 'perioperative',
+               'preoperative', 'surgical procedure', 'surgical intervention'],
+    'study design': ['study design', 'trial design', 'research design', 'methodology',
+                   'randomized', 'controlled', 'cohort', 'case-control', 'cross-sectional'],
+}
+# ============================================================================
+# ADVANCED NLP FUNCTIONS
+# ============================================================================
+def expand_medical_terms(term: str) -> List[str]:
+    """Expand medical terms with synonyms"""
+    term_lower = term.lower()
+    expanded = [term]
+    for key, synonyms in MEDICAL_SYNONYMS.items():
+        if key in term_lower or any(syn in term_lower for syn in synonyms):
+            expanded.extend(synonyms[:3])  # Limit expansion
+    return list(set(expanded))
+def cross_encoder_score(text: str, criteria: str) -> float:
+    """Calculate cross-encoder similarity score"""
+    if not USE_ADVANCED_MODELS:
+        return 0.5  # Default score if not available
+    try:
+        score = cross_encoder.predict([[text, criteria]])
+        return float(1 / (1 + np.exp(-score[0])))
+    except:
+        return 0.5
+def get_pubmed_embedding(text: str) -> np.ndarray:
+    """Get PubMedBERT embedding for medical text"""
+    if not USE_ADVANCED_MODELS:
+        return np.zeros(768)
+    try:
+        inputs = pubmed_tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
         with torch.no_grad():
+            outputs = pubmed_model(**inputs)
             embedding = outputs.last_hidden_state[:, 0, :].numpy()
         return embedding.squeeze()
+    except:
+        return np.zeros(768)
+def zero_shot_classify(text: str, labels: List[str], hypothesis_template: str = "This study is about {}") -> Dict:
+    """Perform zero-shot classification"""
+    if not labels:
+        return {}
+    try:
+        result = classifier(text, candidate_labels=labels[:10], hypothesis_template=hypothesis_template, multi_label=True)
+        scores = {}
+        for label, score in zip(result['labels'], result['scores']):
+            scores[label] = score
+        return scores
+    except:
+        return {}
+# ============================================================================
+# ENHANCED CRITERIA PARSING
+# ============================================================================
+def parse_criteria(criteria_text: str, stage: str = "stage1") -> Dict:
+    """Parse criteria with medical term expansion"""
+    criteria = {
+        'population': [], 'intervention': [], 'comparator': [], 'outcomes': [],
+        'study_design': [], 'include_general': [], 'exclude_general': []
+    }
+    lines = criteria_text.lower().split('\n')
+    current_section = None
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        # Detect section headers
+        if any(keyword in line for keyword in ['population:', 'participants:', 'subjects:']):
+            current_section = 'population'
+        elif any(keyword in line for keyword in ['intervention:', 'exposure:', 'treatment:']):
+            current_section = 'intervention'
+        elif any(keyword in line for keyword in ['comparator:', 'control:', 'comparison:']):
+            current_section = 'comparator'
+        elif any(keyword in line for keyword in ['outcomes:', 'endpoint:', 'results:']):
+            current_section = 'outcomes'
+        elif any(keyword in line for keyword in ['study design:', 'design:', 'study type:']):
+            current_section = 'study_design'
+        elif 'include' in line and ':' in line:
+            current_section = 'include_general'
+        elif 'exclude' in line and ':' in line:
+            current_section = 'exclude_general'
+        elif line.startswith('-') and current_section:
+            term = line[1:].strip()
+            if term and len(term) > 2:
+                # Expand medical terms if advanced models are available
+                if USE_ADVANCED_MODELS:
+                    expanded = expand_medical_terms(term)
+                    criteria[current_section].extend(expanded)
+                else:
+                    criteria[current_section].append(term)
+        elif current_section and not any(keyword in line for keyword in ['include', 'exclude', 'population', 'intervention', 'comparator', 'outcomes', 'study']):
+            terms = [t.strip() for t in line.split(',') if t.strip() and len(t.strip()) > 2]
+            if USE_ADVANCED_MODELS:
+                for term in terms:
+                    expanded = expand_medical_terms(term)
+                    criteria[current_section].extend(expanded)
+            else:
+                criteria[current_section].extend(terms)
+    # Remove duplicates
+    for key in criteria:
+        criteria[key] = list(set(criteria[key]))
+    return criteria
+# ============================================================================
+# ENHANCED STAGE 1 CLASSIFICATION
+# ============================================================================
+def semantic_similarity_score(study_text: str, criteria_terms: List[str]) -> Tuple[float, str]:
+    """Calculate semantic similarity with advanced models if available"""
+    if not criteria_terms:
+        return 0.0, ""
+    best_score, best_match = 0.0, ""
+    if USE_ADVANCED_MODELS:
+        # Use cross-encoder for more accurate matching
+        for term in criteria_terms[:5]:  # Limit to avoid slowdown
+            score = cross_encoder_score(study_text, term)
+            if score > best_score:
+                best_score, best_match = score, term
+    else:
+        # Fallback to basic embedding similarity
+        study_embedding = get_text_embedding(study_text)
+        for term in criteria_terms:
+            term_embedding = get_text_embedding(term)
+            similarity = cosine_similarity(study_embedding, term_embedding)
+            if similarity > best_score:
+                best_score, best_match = similarity, term
+    return best_score, best_match
+def cosine_similarity(a, b):
+    """Simple cosine similarity calculation"""
+    dot_product = np.dot(a, b)
+    norm_a = np.linalg.norm(a)
+    norm_b = np.linalg.norm(b)
+    return dot_product / (norm_a * norm_b) if norm_a > 0 and norm_b > 0 else 0
+def get_text_embedding(text):
+    """Get text embedding using the similarity model"""
+    if USE_ADVANCED_MODELS:
         try:
+            embedding = sentence_model.encode(text)
+            return embedding
         except:
+            return np.zeros(384)
+    else:
         try:
+            if 'similarity_model' in globals():
+                embeddings = similarity_model(text)
+                return np.mean(embeddings[0], axis=0)
+            else:
+                return np.zeros(384)
         except:
+            return np.zeros(384)
+def stage1_classification(title: str, abstract: str, criteria_text: str) -> Dict:
+    """Enhanced Stage 1 classification with advanced NLP when available"""
+    study_text = f"{title} {abstract}".lower()
+    if len(study_text.strip()) < 20:
+        return {'decision': 'UNCLEAR', 'confidence': 20, 'reasoning': 'Insufficient text', 'stage': 1}
+    criteria = parse_criteria(criteria_text, "stage1")
+    # Use zero-shot classification if available with advanced models
+    if USE_ADVANCED_MODELS and criteria['include_general']:
+        zs_scores = zero_shot_classify(
+            study_text,
+            criteria['include_general'][:5],
+            "This study is relevant to {}"
+        )
+        if zs_scores:
+            max_zs_score = max(zs_scores.values())
+            if max_zs_score > 0.7:
+                return {
+                    'decision': 'INCLUDE',
+                    'confidence': min(int(max_zs_score * 100), 85),
+                    'reasoning': f"Stage 1 INCLUDE: High relevance to inclusion criteria ({max_zs_score:.2f})",
+                    'stage': 1
+                }
+    # Calculate PICOS scores with appropriate thresholds
+    pop_score, pop_match = semantic_similarity_score(study_text, criteria['population'])
+    int_score, int_match = semantic_similarity_score(study_text, criteria['intervention'])
+    out_score, out_match = semantic_similarity_score(study_text, criteria['outcomes'])
+    design_score, design_match = semantic_similarity_score(study_text, criteria['study_design'])
+    inc_score, inc_match = semantic_similarity_score(study_text, criteria['include_general'])
+    exc_score, exc_match = semantic_similarity_score(study_text, criteria['exclude_general'])
+    # Adjust thresholds based on model availability
+    threshold = 0.4 if USE_ADVANCED_MODELS else 0.25
+    reasoning_parts = []
+    if pop_score > threshold: reasoning_parts.append(f"Population: '{pop_match}' ({pop_score:.2f})")
+    if int_score > threshold: reasoning_parts.append(f"Intervention: '{int_match}' ({int_score:.2f})")
+    if out_score > threshold: reasoning_parts.append(f"Outcome: '{out_match}' ({out_score:.2f})")
+    if design_score > threshold: reasoning_parts.append(f"Design: '{design_match}' ({design_score:.2f})")
+    if inc_score > threshold: reasoning_parts.append(f"Include: '{inc_match}' ({inc_score:.2f})")
+    if exc_score > threshold: reasoning_parts.append(f"Exclude: '{exc_match}' ({exc_score:.2f})")
+    # Decision Logic
+    exc_threshold = 0.5 if USE_ADVANCED_MODELS else 0.35
+    if exc_score > exc_threshold:
+        decision, confidence = 'EXCLUDE', min(int(exc_score * 100), 90)
+        reasoning = f"Stage 1 EXCLUDE: {'; '.join(reasoning_parts)}"
+    elif sum([pop_score > threshold, int_score > threshold, out_score > threshold]) >= 2 and USE_ADVANCED_MODELS:
+        avg_score = np.mean([s for s in [pop_score, int_score, out_score, design_score, inc_score] if s > threshold])
+        decision, confidence = 'INCLUDE', min(int(avg_score * 85), 85)
+        reasoning = f"Stage 1 INCLUDE (Advanced): {'; '.join(reasoning_parts)}"
+    elif sum([pop_score > 0.25, int_score > 0.25, out_score > 0.25]) >= 1:
+        avg_score = np.mean([s for s in [pop_score, int_score, out_score, design_score, inc_score] if s > 0.25])
+        decision, confidence = 'INCLUDE', min(int(avg_score * 75), 80)
+        reasoning = f"Stage 1 INCLUDE: {'; '.join(reasoning_parts)}"
+    else:
+        decision, confidence = 'UNCLEAR', 40
+        reasoning = f"Stage 1 UNCLEAR: {'; '.join(reasoning_parts) if reasoning_parts else 'No clear matches'}"
+    return {'decision': decision, 'confidence': confidence, 'reasoning': reasoning, 'stage': 1}
 # ============================================================================
+# STAGE 2 CLASSIFICATION (keeping original)
 # ============================================================================
+def stage2_classification(title: str, abstract: str, full_text: str, criteria_text: str,
+                         data_extraction_fields: Dict = None) -> Dict:
+    """Stage 2: Detailed full-text screening with data extraction"""
+    # Combine all available text
+    study_text = f"{title} {abstract} {full_text}".lower()
+    if len(study_text.strip()) < 50:
+        return {'decision': 'UNCLEAR', 'confidence': 25, 'reasoning': 'Insufficient full text', 'stage': 2}
+    criteria = parse_criteria(criteria_text, "stage2")
+    # More stringent scoring for Stage 2
+    pop_score, pop_match = semantic_similarity_score(study_text, criteria['population'])
+    int_score, int_match = semantic_similarity_score(study_text, criteria['intervention'])
+    comp_score, comp_match = semantic_similarity_score(study_text, criteria['comparator'])
+    out_score, out_match = semantic_similarity_score(study_text, criteria['outcomes'])
+    design_score, design_match = semantic_similarity_score(study_text, criteria['study_design'])
+    exc_score, exc_match = semantic_similarity_score(study_text, criteria['exclude_general'])
+    # Data extraction scoring
+    extraction_scores = {}
+    if data_extraction_fields:
+        for field, terms in data_extraction_fields.items():
+            if terms:
+                field_score, field_match = semantic_similarity_score(study_text, terms)
+                extraction_scores[field] = {'score': field_score, 'match': field_match}
+    reasoning_parts = []
+    if pop_score > 0.3: reasoning_parts.append(f"Population: '{pop_match}' ({pop_score:.2f})")
+    if int_score > 0.3: reasoning_parts.append(f"Intervention: '{int_match}' ({int_score:.2f})")
+    if comp_score > 0.3: reasoning_parts.append(f"Comparator: '{comp_match}' ({comp_score:.2f})")
+    if out_score > 0.3: reasoning_parts.append(f"Outcome: '{out_match}' ({out_score:.2f})")
+    if design_score > 0.3: reasoning_parts.append(f"Design: '{design_match}' ({design_score:.2f})")
+    if exc_score > 0.3: reasoning_parts.append(f"Exclusion: '{exc_match}' ({exc_score:.2f})")
+    # Stage 2 Decision Logic (High Specificity)
+    if exc_score > 0.4:
+        decision, confidence = 'EXCLUDE', min(int(exc_score * 100), 95)
+        reasoning = f"Stage 2 EXCLUDE: {'; '.join(reasoning_parts)}"
+    elif sum([pop_score > 0.4, int_score > 0.4, out_score > 0.4, design_score > 0.4]) >= 3:
+        avg_score = np.mean([pop_score, int_score, comp_score, out_score, design_score])
+        decision, confidence = 'INCLUDE', min(int(avg_score * 85), 92)
+        reasoning = f"Stage 2 INCLUDE: {'; '.join(reasoning_parts)}"
+    elif max(pop_score, int_score, out_score) > 0.5:
+        decision, confidence = 'INCLUDE', min(int(max(pop_score, int_score, out_score) * 80), 88)
+        reasoning = f"Stage 2 INCLUDE: {'; '.join(reasoning_parts)}"
+    else:
+        decision, confidence = 'EXCLUDE', 60
+        reasoning = f"Stage 2 EXCLUDE: Insufficient criteria match. {'; '.join(reasoning_parts)}"
+    result = {
+        'decision': decision,
+        'confidence': confidence,
+        'reasoning': reasoning,
+        'stage': 2,
+        'extraction_data': extraction_scores
+    }
+    return result
+# ============================================================================
+# PROCESSING FUNCTIONS (keeping original structure)
+# ============================================================================
+def process_stage1(file, title_col, abstract_col, criteria, sample_size):
+    """Process Stage 1 screening with enhanced NLP"""
     try:
         df = pd.read_csv(file.name)
         if sample_size < len(df):
             df = df.head(sample_size)
             if not title and not abstract:
                 continue
+            classification = stage1_classification(title, abstract, criteria)
             result = {
                 'Study_ID': idx + 1,
                 'Title': title[:100] + "..." if len(title) > 100 else title,
                 'Stage1_Decision': classification['decision'],
                 'Stage1_Confidence': f"{classification['confidence']}%",
                 'Stage1_Reasoning': classification['reasoning'],
                 'Ready_for_Stage2': 'Yes' if classification['decision'] == 'INCLUDE' else 'No',
                 'Full_Title': title,
         results_df = pd.DataFrame(results)
+        # Summary for Stage 1
         total = len(results_df)
         included = len(results_df[results_df['Stage1_Decision'] == 'INCLUDE'])
         excluded = len(results_df[results_df['Stage1_Decision'] == 'EXCLUDE'])
         unclear = len(results_df[results_df['Stage1_Decision'] == 'UNCLEAR'])
+        model_info = "**Using Advanced Medical NLP Models**" if USE_ADVANCED_MODELS else "**Using Basic NLP Models**"
         summary = f"""
+## 📊 Stage 1 (Title/Abstract) Results
+{model_info}
+**Screening Complete:**
+- **Total Studies:** {total}
+- **Include for Stage 2:** {included} ({included/total*100:.1f}%)
+- **Exclude:** {excluded} ({excluded/total*100:.1f}%)
+- **Needs Manual Review:** {unclear} ({unclear/total*100:.1f}%)
 **Next Steps:**
 1. Review {unclear} studies marked as UNCLEAR
 2. Proceed to Stage 2 with {included} included studies
+3. Obtain full texts for Stage 2 screening
         """
         return summary, results_df, results_df.to_csv(index=False)
     except Exception as e:
         return f"Error: {str(e)}", None, ""
+def process_stage2(file, title_col, abstract_col, fulltext_col, criteria, extraction_fields, sample_size):
+    """Process Stage 2 screening with data extraction"""
+    try:
+        df = pd.read_csv(file.name)
+        # Filter to only Stage 1 included studies if column exists
+        if 'Stage1_Decision' in df.columns:
+            df = df[df['Stage1_Decision'] == 'INCLUDE']
+        if sample_size < len(df):
+            df = df.head(sample_size)
+        # Parse extraction fields
+        extraction_dict = {}
+        if extraction_fields:
+            for line in extraction_fields.split('\n'):
+                if ':' in line:
+                    field, terms = line.split(':', 1)
+                    extraction_dict[field.strip()] = [t.strip() for t in terms.split(',') if t.strip()]
+        results = []
+        for idx, row in df.iterrows():
+            title = str(row[title_col]) if pd.notna(row[title_col]) else ""
+            abstract = str(row[abstract_col]) if pd.notna(row[abstract_col]) else ""
+            full_text = str(row[fulltext_col]) if fulltext_col and fulltext_col in df.columns and pd.notna(row[fulltext_col]) else ""
+            if not title and not abstract:
+                continue
+            classification = stage2_classification(title, abstract, full_text, criteria, extraction_dict)
+            result = {
+                'Study_ID': idx + 1,
+                'Title': title[:100] + "..." if len(title) > 100 else title,
+                'Stage2_Decision': classification['decision'],
+                'Stage2_Confidence': f"{classification['confidence']}%",
+                'Stage2_Reasoning': classification['reasoning'],
+                'Final_Include': 'Yes' if classification['decision'] == 'INCLUDE' else 'No',
+                'Extraction_Data': str(classification.get('extraction_data', {})),
+                'Full_Title': title,
+                'Full_Abstract': abstract,
+                'Full_Text': full_text
+            }
+            results.append(result)
+        results_df = pd.DataFrame(results)
+        # Summary for Stage 2
+        total = len(results_df)
+        final_included = len(results_df[results_df['Stage2_Decision'] == 'INCLUDE'])
+        final_excluded = len(results_df[results_df['Stage2_Decision'] == 'EXCLUDE'])
+        summary = f"""
+## 📊 Stage 2 (Full-Text) Results
+**Detailed Screening Complete:**
+- **Studies Reviewed:** {total}
+- **Final INCLUDE:** {final_included} ({final_included/total*100:.1f}%)
+- **Final EXCLUDE:** {final_excluded} ({final_excluded/total*100:.1f}%)
+**Ready for Next Steps:**
+- **Data Extraction:** {final_included} studies
+- **Quality Assessment:** {final_included} studies
+- **Evidence Synthesis:** Ready to proceed
+**Recommended Actions:**
+1. Export {final_included} included studies for detailed data extraction
+2. Conduct quality assessment (ROB2, ROBINS-I, etc.)
+3. Begin evidence synthesis and meta-analysis planning
+        """
+        return summary, results_df, results_df.to_csv(index=False)
+    except Exception as e:
+        return f"Error: {str(e)}", None, ""
+# ============================================================================
+# ORIGINAL INTERFACE (PRESERVED)
+# ============================================================================
+def create_interface():
+    with gr.Blocks(title="🔬 2-Stage Systematic Review AI Assistant", theme=gr.themes.Soft()) as interface:
+        gr.Markdown("""
+        # 🔬 2-Stage Systematic Review AI Assistant
+        **Complete workflow for evidence-based systematic reviews**
+        This tool supports the full 2-stage systematic review process:
+        - **Stage 1:** Title/Abstract screening (high sensitivity)
+        - **Stage 2:** Full-text screening with data extraction (high specificity)
         """)
         with gr.Tabs():
             # STAGE 1 TAB
+            with gr.TabItem("📋 Stage 1: Title/Abstract Screening"):
                 with gr.Row():
                     with gr.Column(scale=1):
                         gr.Markdown("### 📁 Upload Study Data")
                             stage1_title_col = gr.Dropdown(label="Title Column", choices=[], interactive=True)
                             stage1_abstract_col = gr.Dropdown(label="Abstract Column", choices=[], interactive=True)
+                        stage1_sample = gr.Slider(label="Studies to Process", minimum=5, maximum=500, value=100, step=5)
                     with gr.Column(scale=1):
+                        gr.Markdown("### 🎯 Stage 1 Criteria (Broad/Sensitive)")
                         stage1_criteria = gr.Textbox(
+                            label="Inclusion/Exclusion Criteria for Stage 1",
                             value="""POPULATION:
+- Adult participants
+- Human studies
 INTERVENTION:
+- [Your intervention/exposure of interest]
 OUTCOMES:
+- [Primary outcomes of interest]
 STUDY DESIGN:
 - Randomized controlled trials
 - Cohort studies
+- Case-control studies
 EXCLUDE:
 - Animal studies
 - Case reports
+- Reviews (unless relevant)""",
+                            lines=15
                         )
+                stage1_process_btn = gr.Button("🚀 Start Stage 1 Screening", variant="primary")
                 stage1_results = gr.Markdown()
+                stage1_table = gr.Dataframe(label="Stage 1 Results")
                 stage1_download_data = gr.Textbox(visible=False)
+                stage1_download_btn = gr.DownloadButton(label="💾 Download Stage 1 Results", visible=False)
+            # STAGE 2 TAB
+            with gr.TabItem("📄 Stage 2: Full-Text Screening"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 📁 Upload Stage 1 Results or Full-Text Data")
+                        stage2_file = gr.File(
+                            label="Upload Stage 1 Results or Studies with Full Text",
+                            file_types=[".csv"],
+                            type="filepath"
+                        )
+                        with gr.Row():
+                            stage2_title_col = gr.Dropdown(label="Title Column", choices=[], interactive=True)
+                            stage2_abstract_col = gr.Dropdown(label="Abstract Column", choices=[], interactive=True)
+                        stage2_fulltext_col = gr.Dropdown(label="Full Text Column", choices=[], interactive=True)
+                        stage2_sample = gr.Slider(label="Studies to Process", minimum=5, maximum=200, value=50, step=5)
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 🎯 Stage 2 Criteria (Strict/Specific)")
+                        stage2_criteria = gr.Textbox(
+                            label="Detailed Inclusion/Exclusion Criteria for Stage 2",
+                            value="""POPULATION:
+- [Specific population criteria]
+- [Age ranges, conditions, etc.]
+INTERVENTION:
+- [Detailed intervention specifications]
+- [Dosage, duration, delivery method]
+COMPARATOR:
+- [Control group specifications]
+- [Placebo, standard care, etc.]
+OUTCOMES:
+- [Primary endpoint definitions]
+- [Secondary outcomes]
+- [Measurement methods]
+STUDY DESIGN:
+- [Minimum study quality requirements]
+- [Follow-up duration requirements]
+EXCLUDE:
+- [Specific exclusion criteria]
+- [Study quality thresholds]""",
+                            lines=15
+                        )
+                        extraction_fields = gr.Textbox(
+                            label="Data Extraction Fields (Optional)",
+                            value="""Sample Size: participants, subjects, patients, n=
+Intervention Duration: weeks, months, days, duration
+Primary Outcome: endpoint, primary outcome, main outcome
+Statistical Method: analysis, statistical, regression, model
+Risk of Bias: randomization, blinding, allocation""",
+                            lines=8
+                        )
+                stage2_process_btn = gr.Button("🔍 Start Stage 2 Screening", variant="primary")
+                stage2_results = gr.Markdown()
+                stage2_table = gr.Dataframe(label="Stage 2 Results with Data Extraction")
+                stage2_download_data = gr.Textbox(visible=False)
+                stage2_download_btn = gr.DownloadButton(label="💾 Download Final Results", visible=False)
+            # WORKFLOW GUIDANCE TAB
+            with gr.TabItem("📚 Systematic Review Workflow"):
                 gr.Markdown("""
+                ## 🔄 Complete 2-Stage Systematic Review Process
+                ### **Stage 1: Title/Abstract Screening**
+                **Objective:** High sensitivity screening to identify potentially relevant studies
+                **Process:**
+                1. Upload search results from multiple databases (PubMed, Embase, etc.)
+                2. Define broad inclusion/exclusion criteria
+                3. AI screens titles/abstracts with high sensitivity
+                4. Manually review "UNCLEAR" classifications
+                5. Export studies marked for inclusion to Stage 2
+                **Criteria Guidelines:**
+                - Use broad terms to capture all potentially relevant studies
+                - Focus on key PICOS elements (Population, Intervention, Outcomes)
+                - Err on the side of inclusion when uncertain
+                ### **Stage 2: Full-Text Screening**
+                **Objective:** High specificity screening with detailed data extraction
+                **Process:**
+                1. Upload Stage 1 results or add full-text content
+                2. Define strict, specific inclusion/exclusion criteria
+                3. AI performs detailed full-text analysis
+                4. Extract key data points for synthesis
+                5. Export final included studies for meta-analysis
+                **Criteria Guidelines:**
+                - Use specific, measurable criteria
+                - Include detailed PICOS specifications
+                - Define minimum quality thresholds
+                - Specify exact outcome measurements needed
+                ### **Quality Assurance Recommendations:**
+                **For Stage 1:**
+                - Manual review of 10-20% of AI decisions
+                - Inter-rater reliability testing with subset
+                - Calibration exercises among reviewers
+                **For Stage 2:**
+                - Manual validation of all AI INCLUDE decisions
+                - Detailed reason documentation for exclusions
+                - Data extraction verification by second reviewer
+                ### **After 2-Stage Screening:**
+                1. **Data Extraction:** Extract detailed study characteristics
+                2. **Quality Assessment:** Apply ROB2, ROBINS-I, or other tools
+                3. **Evidence Synthesis:** Qualitative synthesis and meta-analysis
+                4. **GRADE Assessment:** Evaluate certainty of evidence
+                5. **Reporting:** Follow PRISMA guidelines
+                ### **Best Practices:**
+                - **Document everything:** Keep detailed logs of decisions and criteria
+                - **Validate AI decisions:** Use AI as assistance, not replacement
+                - **Follow guidelines:** Adhere to Cochrane and PRISMA standards
+                - **Test criteria:** Pilot with known studies before full screening
+                - **Multiple reviewers:** Have disagreements resolved by third reviewer
+                ### **When to Use Each Stage:**
+                **Use Stage 1 when:**
+                - Starting with large search results (>1000 studies)
+                - Need to quickly filter irrelevant studies
+                - Working with title/abstract data only
+                **Use Stage 2 when:**
+                - Have full-text access to studies
+                - Need detailed inclusion/exclusion assessment
+                - Ready for data extraction
+                - Preparing for meta-analysis
+                ### **Advanced NLP Features:**
+                This tool now includes advanced medical NLP models when available:
+                - **PubMedBERT** for medical text understanding
+                - **Cross-encoders** for accurate semantic matching
+                - **Zero-shot classification** for flexible criteria
+                - **Medical term expansion** for comprehensive matching
+                The system automatically detects and uses advanced models when available,
+                falling back to basic models if needed.
                 """)
+        # Event handlers for file uploads and column detection
         def update_stage1_columns(file):
             if file is None:
                 return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
             except:
                 return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
+        def update_stage2_columns(file):
+            if file is None:
+                return gr.Dropdown(choices=[]), gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
+            try:
+                df = pd.read_csv(file.name)
+                columns = df.columns.tolist()
+                title_col = next((col for col in columns if 'title' in col.lower()), columns[0] if columns else None)
+                abstract_col = next((col for col in columns if 'abstract' in col.lower()), columns[1] if len(columns) > 1 else None)
+                fulltext_col = next((col for col in columns if any(term in col.lower() for term in ['full_text', 'fulltext', 'text', 'content'])), None)
+                return (gr.Dropdown(choices=columns, value=title_col),
+                       gr.Dropdown(choices=columns, value=abstract_col),
+                       gr.Dropdown(choices=columns, value=fulltext_col))
+            except:
+                return gr.Dropdown(choices=[]), gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
+        # Event bindings
+        stage1_file.change(fn=update_stage1_columns, inputs=[stage1_file], outputs=[stage1_title_col, stage1_abstract_col])
+        stage2_file.change(fn=update_stage2_columns, inputs=[stage2_file], outputs=[stage2_title_col, stage2_abstract_col, stage2_fulltext_col])
+        def process_stage1_with_download(*args):
+            summary, table, csv_data = process_stage1(*args)
+            return summary, table, csv_data, gr.DownloadButton(visible=bool(csv_data))
+        def process_stage2_with_download(*args):
+            summary, table, csv_data = process_stage2(*args)
             return summary, table, csv_data, gr.DownloadButton(visible=bool(csv_data))
         stage1_process_btn.click(
+            fn=process_stage1_with_download,
             inputs=[stage1_file, stage1_title_col, stage1_abstract_col, stage1_criteria, stage1_sample],
             outputs=[stage1_results, stage1_table, stage1_download_data, stage1_download_btn]
         )
+        stage2_process_btn.click(
+            fn=process_stage2_with_download,
+            inputs=[stage2_file, stage2_title_col, stage2_abstract_col, stage2_fulltext_col, stage2_criteria, extraction_fields, stage2_sample],
+            outputs=[stage2_results, stage2_table, stage2_download_data, stage2_download_btn]
         )
+        stage1_download_btn.click(lambda data: data, inputs=[stage1_download_data], outputs=[gr.File()])
+        stage2_download_btn.click(lambda data: data, inputs=[stage2_download_data], outputs=[gr.File()])
     return interface
 if __name__ == "__main__":
+    interface = create_interface()
     interface.launch()