Spaces:

HebaElshimy
/

systematic-reviews

Sleeping

App Files Files Community

HebaElshimy commited on 26 days ago

Commit

fee9667

verified ·

1 Parent(s): f2eb705

Upload 2 files

Browse files

Files changed (2) hide show

app.py +580 -455
requirements.txt +21 -6

app.py CHANGED Viewed

@@ -1,317 +1,532 @@
 import gradio as gr
 import pandas as pd
-import requests
-import json
-from transformers import pipeline
 import torch
-import time
-from typing import List, Dict, Tuple
 import re
-import numpy as np
-# Initialize models
-print("Loading models...")
-classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-similarity_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
-print("Models loaded successfully!")
-def cosine_similarity(a, b):
-    """Simple cosine similarity calculation"""
-    dot_product = np.dot(a, b)
-    norm_a = np.linalg.norm(a)
-    norm_b = np.linalg.norm(b)
-    return dot_product / (norm_a * norm_b) if norm_a > 0 and norm_b > 0 else 0
-def get_text_embedding(text):
-    """Get text embedding using the similarity model"""
-    try:
-        embeddings = similarity_model(text)
-        return np.mean(embeddings[0], axis=0)
-    except:
-        return np.zeros(384)
-def parse_criteria(criteria_text: str, stage: str = "stage1") -> Dict:
-    """Parse criteria differently for Stage 1 vs Stage 2"""
-    criteria = {
-        'population': [], 'intervention': [], 'comparator': [], 'outcomes': [],
-        'study_design': [], 'include_general': [], 'exclude_general': []
-    }
-    lines = criteria_text.lower().split('\n')
-    current_section = None
-    for line in lines:
-        line = line.strip()
-        if not line:
-            continue
-        # Detect section headers
-        if any(keyword in line for keyword in ['population:', 'participants:', 'subjects:']):
-            current_section = 'population'
-        elif any(keyword in line for keyword in ['intervention:', 'exposure:', 'treatment:']):
-            current_section = 'intervention'
-        elif any(keyword in line for keyword in ['comparator:', 'control:', 'comparison:']):
-            current_section = 'comparator'
-        elif any(keyword in line for keyword in ['outcomes:', 'endpoint:', 'results:']):
-            current_section = 'outcomes'
-        elif any(keyword in line for keyword in ['study design:', 'design:', 'study type:']):
-            current_section = 'study_design'
-        elif 'include' in line and ':' in line:
-            current_section = 'include_general'
-        elif 'exclude' in line and ':' in line:
-            current_section = 'exclude_general'
-        elif line.startswith('-') and current_section:
-            term = line[1:].strip()
-            if term and len(term) > 2:
-                criteria[current_section].append(term)
-        elif current_section and not any(keyword in line for keyword in ['include', 'exclude', 'population', 'intervention', 'comparator', 'outcomes', 'study']):
-            terms = [t.strip() for t in line.split(',') if t.strip() and len(t.strip()) > 2]
-            criteria[current_section].extend(terms)
-    return criteria
-def semantic_similarity_score(study_text: str, criteria_terms: List[str]) -> Tuple[float, str]:
-    """Calculate semantic similarity between study and criteria terms"""
-    if not criteria_terms:
-        return 0.0, ""
-    study_embedding = get_text_embedding(study_text)
-    best_score, best_match = 0.0, ""
-    for term in criteria_terms:
-        term_embedding = get_text_embedding(term)
-        similarity = cosine_similarity(study_embedding, term_embedding)
-        if similarity > best_score:
-            best_score, best_match = similarity, term
-    return best_score, best_match
-def stage1_classification(title: str, abstract: str, criteria_text: str) -> Dict:
-    """Stage 1: Broad title/abstract screening with high sensitivity"""
-    study_text = f"{title} {abstract}".lower()
-    if len(study_text.strip()) < 20:
-        return {'decision': 'UNCLEAR', 'confidence': 20, 'reasoning': 'Insufficient text', 'stage': 1}
-    criteria = parse_criteria(criteria_text, "stage1")
-    # Calculate PICOS scores with lower thresholds for Stage 1
-    pop_score, pop_match = semantic_similarity_score(study_text, criteria['population'])
-    int_score, int_match = semantic_similarity_score(study_text, criteria['intervention'])
-    out_score, out_match = semantic_similarity_score(study_text, criteria['outcomes'])
-    design_score, design_match = semantic_similarity_score(study_text, criteria['study_design'])
-    inc_score, inc_match = semantic_similarity_score(study_text, criteria['include_general'])
-    exc_score, exc_match = semantic_similarity_score(study_text, criteria['exclude_general'])
-    reasoning_parts = []
-    if pop_score > 0.25: reasoning_parts.append(f"Population: '{pop_match}' ({pop_score:.2f})")
-    if int_score > 0.25: reasoning_parts.append(f"Intervention: '{int_match}' ({int_score:.2f})")
-    if out_score > 0.25: reasoning_parts.append(f"Outcome: '{out_match}' ({out_score:.2f})")
-    if design_score > 0.25: reasoning_parts.append(f"Design: '{design_match}' ({design_score:.2f})")
-    if inc_score > 0.25: reasoning_parts.append(f"Include: '{inc_match}' ({inc_score:.2f})")
-    if exc_score > 0.25: reasoning_parts.append(f"Exclude: '{exc_match}' ({exc_score:.2f})")
-    # Stage 1 Decision Logic (High Sensitivity)
-    if exc_score > 0.35:  # Clear exclusion
-        decision, confidence = 'EXCLUDE', min(int(exc_score * 100), 90)
-        reasoning = f"Stage 1 EXCLUDE: {'; '.join(reasoning_parts)}"
-    elif sum([pop_score > 0.25, int_score > 0.25, out_score > 0.25]) >= 1:  # Any relevant match
-        avg_score = np.mean([s for s in [pop_score, int_score, out_score, design_score, inc_score] if s > 0.25])
-        decision, confidence = 'INCLUDE', min(int(avg_score * 75), 80)
-        reasoning = f"Stage 1 INCLUDE: {'; '.join(reasoning_parts)}"
-    else:
-        decision, confidence = 'UNCLEAR', 40
-        reasoning = f"Stage 1 UNCLEAR: {'; '.join(reasoning_parts) if reasoning_parts else 'No clear matches'}"
-    return {'decision': decision, 'confidence': confidence, 'reasoning': reasoning, 'stage': 1}
-def stage2_classification(title: str, abstract: str, full_text: str, criteria_text: str,
-                         data_extraction_fields: Dict = None) -> Dict:
-    """Stage 2: Detailed full-text screening with data extraction"""
-    # Combine all available text
-    study_text = f"{title} {abstract} {full_text}".lower()
-    if len(study_text.strip()) < 50:
-        return {'decision': 'UNCLEAR', 'confidence': 25, 'reasoning': 'Insufficient full text', 'stage': 2}
-    criteria = parse_criteria(criteria_text, "stage2")
-    # More stringent scoring for Stage 2
-    pop_score, pop_match = semantic_similarity_score(study_text, criteria['population'])
-    int_score, int_match = semantic_similarity_score(study_text, criteria['intervention'])
-    comp_score, comp_match = semantic_similarity_score(study_text, criteria['comparator'])
-    out_score, out_match = semantic_similarity_score(study_text, criteria['outcomes'])
-    design_score, design_match = semantic_similarity_score(study_text, criteria['study_design'])
-    exc_score, exc_match = semantic_similarity_score(study_text, criteria['exclude_general'])
-    # Data extraction scoring
-    extraction_scores = {}
-    if data_extraction_fields:
-        for field, terms in data_extraction_fields.items():
-            if terms:
-                field_score, field_match = semantic_similarity_score(study_text, terms)
-                extraction_scores[field] = {'score': field_score, 'match': field_match}
-    reasoning_parts = []
-    if pop_score > 0.3: reasoning_parts.append(f"Population: '{pop_match}' ({pop_score:.2f})")
-    if int_score > 0.3: reasoning_parts.append(f"Intervention: '{int_match}' ({int_score:.2f})")
-    if comp_score > 0.3: reasoning_parts.append(f"Comparator: '{comp_match}' ({comp_score:.2f})")
-    if out_score > 0.3: reasoning_parts.append(f"Outcome: '{out_match}' ({out_score:.2f})")
-    if design_score > 0.3: reasoning_parts.append(f"Design: '{design_match}' ({design_score:.2f})")
-    if exc_score > 0.3: reasoning_parts.append(f"Exclusion: '{exc_match}' ({exc_score:.2f})")
-    # Stage 2 Decision Logic (High Specificity)
-    if exc_score > 0.4:  # Strong exclusion
-        decision, confidence = 'EXCLUDE', min(int(exc_score * 100), 95)
-        reasoning = f"Stage 2 EXCLUDE: {'; '.join(reasoning_parts)}"
-    elif sum([pop_score > 0.4, int_score > 0.4, out_score > 0.4, design_score > 0.4]) >= 3:  # Multiple strong matches
-        avg_score = np.mean([pop_score, int_score, comp_score, out_score, design_score])
-        decision, confidence = 'INCLUDE', min(int(avg_score * 85), 92)
-        reasoning = f"Stage 2 INCLUDE: {'; '.join(reasoning_parts)}"
-    elif max(pop_score, int_score, out_score) > 0.5:  # Very strong single match
-        decision, confidence = 'INCLUDE', min(int(max(pop_score, int_score, out_score) * 80), 88)
-        reasoning = f"Stage 2 INCLUDE: {'; '.join(reasoning_parts)}"
-    else:
-        decision, confidence = 'EXCLUDE', 60
-        reasoning = f"Stage 2 EXCLUDE: Insufficient criteria match. {'; '.join(reasoning_parts)}"
-    result = {
-        'decision': decision,
-        'confidence': confidence,
-        'reasoning': reasoning,
-        'stage': 2,
-        'extraction_data': extraction_scores
-    }
-    return result
-def process_stage1(file, title_col, abstract_col, criteria, sample_size):
-    """Process Stage 1 screening"""
-    try:
-        df = pd.read_csv(file.name)
-        if sample_size < len(df):
-            df = df.head(sample_size)
-        results = []
-        for idx, row in df.iterrows():
-            title = str(row[title_col]) if pd.notna(row[title_col]) else ""
-            abstract = str(row[abstract_col]) if pd.notna(row[abstract_col]) else ""
-            if not title and not abstract:
                 continue
-            classification = stage1_classification(title, abstract, criteria)
-            result = {
-                'Study_ID': idx + 1,
-                'Title': title[:100] + "..." if len(title) > 100 else title,
-                'Stage1_Decision': classification['decision'],
-                'Stage1_Confidence': f"{classification['confidence']}%",
-                'Stage1_Reasoning': classification['reasoning'],
-                'Ready_for_Stage2': 'Yes' if classification['decision'] == 'INCLUDE' else 'No',
-                'Full_Title': title,
-                'Full_Abstract': abstract
-            }
-            results.append(result)
-        results_df = pd.DataFrame(results)
-        # Summary for Stage 1
-        total = len(results_df)
-        included = len(results_df[results_df['Stage1_Decision'] == 'INCLUDE'])
-        excluded = len(results_df[results_df['Stage1_Decision'] == 'EXCLUDE'])
-        unclear = len(results_df[results_df['Stage1_Decision'] == 'UNCLEAR'])
-        summary = f"""
-## 📊 Stage 1 (Title/Abstract) Results
-**Screening Complete:**
-- **Total Studies:** {total}
-- **Include for Stage 2:** {included} ({included/total*100:.1f}%)
-- **Exclude:** {excluded} ({excluded/total*100:.1f}%)
-- **Needs Manual Review:** {unclear} ({unclear/total*100:.1f}%)
-**Next Steps:**
-1. Review {unclear} studies marked as UNCLEAR
-2. Proceed to Stage 2 with {included} included studies
-3. Obtain full texts for Stage 2 screening
-        """
-        return summary, results_df, results_df.to_csv(index=False)
-    except Exception as e:
-        return f"Error: {str(e)}", None, ""
-def process_stage2(file, title_col, abstract_col, fulltext_col, criteria, extraction_fields, sample_size):
-    """Process Stage 2 screening with data extraction"""
-    try:
-        df = pd.read_csv(file.name)
-        # Filter to only Stage 1 included studies if column exists
-        if 'Stage1_Decision' in df.columns:
-            df = df[df['Stage1_Decision'] == 'INCLUDE']
         if sample_size < len(df):
             df = df.head(sample_size)
-        # Parse extraction fields
-        extraction_dict = {}
-        if extraction_fields:
-            for line in extraction_fields.split('\n'):
-                if ':' in line:
-                    field, terms = line.split(':', 1)
-                    extraction_dict[field.strip()] = [t.strip() for t in terms.split(',') if t.strip()]
         results = []
         for idx, row in df.iterrows():
             title = str(row[title_col]) if pd.notna(row[title_col]) else ""
             abstract = str(row[abstract_col]) if pd.notna(row[abstract_col]) else ""
-            full_text = str(row[fulltext_col]) if fulltext_col and fulltext_col in df.columns and pd.notna(row[fulltext_col]) else ""
             if not title and not abstract:
                 continue
-            classification = stage2_classification(title, abstract, full_text, criteria, extraction_dict)
             result = {
                 'Study_ID': idx + 1,
                 'Title': title[:100] + "..." if len(title) > 100 else title,
-                'Stage2_Decision': classification['decision'],
-                'Stage2_Confidence': f"{classification['confidence']}%",
-                'Stage2_Reasoning': classification['reasoning'],
-                'Final_Include': 'Yes' if classification['decision'] == 'INCLUDE' else 'No',
-                'Extraction_Data': str(classification.get('extraction_data', {})),
                 'Full_Title': title,
-                'Full_Abstract': abstract,
-                'Full_Text': full_text
             }
             results.append(result)
         results_df = pd.DataFrame(results)
-        # Summary for Stage 2
         total = len(results_df)
-        final_included = len(results_df[results_df['Stage2_Decision'] == 'INCLUDE'])
-        final_excluded = len(results_df[results_df['Stage2_Decision'] == 'EXCLUDE'])
         summary = f"""
-## 📊 Stage 2 (Full-Text) Results
-**Detailed Screening Complete:**
-- **Studies Reviewed:** {total}
-- **Final INCLUDE:** {final_included} ({final_included/total*100:.1f}%)
-- **Final EXCLUDE:** {final_excluded} ({final_excluded/total*100:.1f}%)
-**Ready for Next Steps:**
-- **Data Extraction:** {final_included} studies
-- **Quality Assessment:** {final_included} studies
-- **Evidence Synthesis:** Ready to proceed
-**Recommended Actions:**
-1. Export {final_included} included studies for detailed data extraction
-2. Conduct quality assessment (ROB2, ROBINS-I, etc.)
-3. Begin evidence synthesis and meta-analysis planning
         """
         return summary, results_df, results_df.to_csv(index=False)
@@ -319,23 +534,28 @@ def process_stage2(file, title_col, abstract_col, fulltext_col, criteria, extrac
     except Exception as e:
         return f"Error: {str(e)}", None, ""
-def create_interface():
-    with gr.Blocks(title="🔬 2-Stage Systematic Review AI Assistant", theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
-        # 🔬 2-Stage Systematic Review AI Assistant
-        **Complete workflow for evidence-based systematic reviews**
-        This tool supports the full 2-stage systematic review process:
-        - **Stage 1:** Title/Abstract screening (high sensitivity)
-        - **Stage 2:** Full-text screening with data extraction (high specificity)
         """)
         with gr.Tabs():
             # STAGE 1 TAB
-            with gr.TabItem("📋 Stage 1: Title/Abstract Screening"):
                 with gr.Row():
                     with gr.Column(scale=1):
                         gr.Markdown("### 📁 Upload Study Data")
@@ -350,189 +570,113 @@ def create_interface():
                             stage1_title_col = gr.Dropdown(label="Title Column", choices=[], interactive=True)
                             stage1_abstract_col = gr.Dropdown(label="Abstract Column", choices=[], interactive=True)
-                        stage1_sample = gr.Slider(label="Studies to Process", minimum=5, maximum=500, value=100, step=5)
                     with gr.Column(scale=1):
-                        gr.Markdown("### 🎯 Stage 1 Criteria (Broad/Sensitive)")
                         stage1_criteria = gr.Textbox(
-                            label="Inclusion/Exclusion Criteria for Stage 1",
                             value="""POPULATION:
-- Adult participants
-- Human studies
 INTERVENTION:
-- [Your intervention/exposure of interest]
 OUTCOMES:
-- [Primary outcomes of interest]
 STUDY DESIGN:
 - Randomized controlled trials
 - Cohort studies
-- Case-control studies
 EXCLUDE:
 - Animal studies
 - Case reports
-- Reviews (unless relevant)""",
-                            lines=15
                         )
-                stage1_process_btn = gr.Button("🚀 Start Stage 1 Screening", variant="primary")
                 stage1_results = gr.Markdown()
-                stage1_table = gr.Dataframe(label="Stage 1 Results")
                 stage1_download_data = gr.Textbox(visible=False)
-                stage1_download_btn = gr.DownloadButton(label="💾 Download Stage 1 Results", visible=False)
-            # STAGE 2 TAB
-            with gr.TabItem("📄 Stage 2: Full-Text Screening"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        gr.Markdown("### 📁 Upload Stage 1 Results or Full-Text Data")
-                        stage2_file = gr.File(
-                            label="Upload Stage 1 Results or Studies with Full Text",
-                            file_types=[".csv"],
-                            type="filepath"
-                        )
-                        with gr.Row():
-                            stage2_title_col = gr.Dropdown(label="Title Column", choices=[], interactive=True)
-                            stage2_abstract_col = gr.Dropdown(label="Abstract Column", choices=[], interactive=True)
-                        stage2_fulltext_col = gr.Dropdown(label="Full Text Column", choices=[], interactive=True)
-                        stage2_sample = gr.Slider(label="Studies to Process", minimum=5, maximum=200, value=50, step=5)
-                    with gr.Column(scale=1):
-                        gr.Markdown("### 🎯 Stage 2 Criteria (Strict/Specific)")
-                        stage2_criteria = gr.Textbox(
-                            label="Detailed Inclusion/Exclusion Criteria for Stage 2",
-                            value="""POPULATION:
-- [Specific population criteria]
-- [Age ranges, conditions, etc.]
-INTERVENTION:
-- [Detailed intervention specifications]
-- [Dosage, duration, delivery method]
-COMPARATOR:
-- [Control group specifications]
-- [Placebo, standard care, etc.]
-OUTCOMES:
-- [Primary endpoint definitions]
-- [Secondary outcomes]
-- [Measurement methods]
-STUDY DESIGN:
-- [Minimum study quality requirements]
-- [Follow-up duration requirements]
-EXCLUDE:
-- [Specific exclusion criteria]
-- [Study quality thresholds]""",
-                            lines=15
-                        )
-                        extraction_fields = gr.Textbox(
-                            label="Data Extraction Fields (Optional)",
-                            value="""Sample Size: participants, subjects, patients, n=
-Intervention Duration: weeks, months, days, duration
-Primary Outcome: endpoint, primary outcome, main outcome
-Statistical Method: analysis, statistical, regression, model
-Risk of Bias: randomization, blinding, allocation""",
-                            lines=8
-                        )
-                stage2_process_btn = gr.Button("🔍 Start Stage 2 Screening", variant="primary")
-                stage2_results = gr.Markdown()
-                stage2_table = gr.Dataframe(label="Stage 2 Results with Data Extraction")
-                stage2_download_data = gr.Textbox(visible=False)
-                stage2_download_btn = gr.DownloadButton(label="💾 Download Final Results", visible=False)
-            # WORKFLOW GUIDANCE TAB
-            with gr.TabItem("📚 Systematic Review Workflow"):
                 gr.Markdown("""
-                ## 🔄 Complete 2-Stage Systematic Review Process
-                ### **Stage 1: Title/Abstract Screening**
-                **Objective:** High sensitivity screening to identify potentially relevant studies
-                **Process:**
-                1. Upload search results from multiple databases (PubMed, Embase, etc.)
-                2. Define broad inclusion/exclusion criteria
-                3. AI screens titles/abstracts with high sensitivity
-                4. Manually review "UNCLEAR" classifications
-                5. Export studies marked for inclusion to Stage 2
-                **Criteria Guidelines:**
-                - Use broad terms to capture all potentially relevant studies
-                - Focus on key PICOS elements (Population, Intervention, Outcomes)
-                - Err on the side of inclusion when uncertain
-                ### **Stage 2: Full-Text Screening**
-                **Objective:** High specificity screening with detailed data extraction
-                **Process:**
-                1. Upload Stage 1 results or add full-text content
-                2. Define strict, specific inclusion/exclusion criteria
-                3. AI performs detailed full-text analysis
-                4. Extract key data points for synthesis
-                5. Export final included studies for meta-analysis
-                **Criteria Guidelines:**
-                - Use specific, measurable criteria
-                - Include detailed PICOS specifications
-                - Define minimum quality thresholds
-                - Specify exact outcome measurements needed
-                ### **Quality Assurance Recommendations:**
-                **For Stage 1:**
-                - Manual review of 10-20% of AI decisions
-                - Inter-rater reliability testing with subset
-                - Calibration exercises among reviewers
-                **For Stage 2:**
-                - Manual validation of all AI INCLUDE decisions
-                - Detailed reason documentation for exclusions
-                - Data extraction verification by second reviewer
-                ### **After 2-Stage Screening:**
-                1. **Data Extraction:** Extract detailed study characteristics
-                2. **Quality Assessment:** Apply ROB2, ROBINS-I, or other tools
-                3. **Evidence Synthesis:** Qualitative synthesis and meta-analysis
-                4. **GRADE Assessment:** Evaluate certainty of evidence
-                5. **Reporting:** Follow PRISMA guidelines
-                ### **Best Practices:**
-                - **Document everything:** Keep detailed logs of decisions and criteria
-                - **Validate AI decisions:** Use AI as assistance, not replacement
-                - **Follow guidelines:** Adhere to Cochrane and PRISMA standards
-                - **Test criteria:** Pilot with known studies before full screening
-                - **Multiple reviewers:** Have disagreements resolved by third reviewer
-                ### **When to Use Each Stage:**
-                **Use Stage 1 when:**
-                - Starting with large search results (>1000 studies)
-                - Need to quickly filter irrelevant studies
-                - Working with title/abstract data only
-                **Use Stage 2 when:**
-                - Have full-text access to studies
-                - Need detailed inclusion/exclusion assessment
-                - Ready for data extraction
-                - Preparing for meta-analysis
                 """)
-        # Event handlers for file uploads and column detection
         def update_stage1_columns(file):
             if file is None:
                 return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
@@ -545,50 +689,31 @@ Risk of Bias: randomization, blinding, allocation""",
             except:
                 return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
-        def update_stage2_columns(file):
-            if file is None:
-                return gr.Dropdown(choices=[]), gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
-            try:
-                df = pd.read_csv(file.name)
-                columns = df.columns.tolist()
-                title_col = next((col for col in columns if 'title' in col.lower()), columns[0] if columns else None)
-                abstract_col = next((col for col in columns if 'abstract' in col.lower()), columns[1] if len(columns) > 1 else None)
-                fulltext_col = next((col for col in columns if any(term in col.lower() for term in ['full_text', 'fulltext', 'text', 'content'])), None)
-                return (gr.Dropdown(choices=columns, value=title_col),
-                       gr.Dropdown(choices=columns, value=abstract_col),
-                       gr.Dropdown(choices=columns, value=fulltext_col))
-            except:
-                return gr.Dropdown(choices=[]), gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
-        # Event bindings
-        stage1_file.change(fn=update_stage1_columns, inputs=[stage1_file], outputs=[stage1_title_col, stage1_abstract_col])
-        stage2_file.change(fn=update_stage2_columns, inputs=[stage2_file], outputs=[stage2_title_col, stage2_abstract_col, stage2_fulltext_col])
-        def process_stage1_with_download(*args):
-            summary, table, csv_data = process_stage1(*args)
-            return summary, table, csv_data, gr.DownloadButton(visible=bool(csv_data))
-        def process_stage2_with_download(*args):
-            summary, table, csv_data = process_stage2(*args)
             return summary, table, csv_data, gr.DownloadButton(visible=bool(csv_data))
         stage1_process_btn.click(
-            fn=process_stage1_with_download,
             inputs=[stage1_file, stage1_title_col, stage1_abstract_col, stage1_criteria, stage1_sample],
             outputs=[stage1_results, stage1_table, stage1_download_data, stage1_download_btn]
         )
-        stage2_process_btn.click(
-            fn=process_stage2_with_download,
-            inputs=[stage2_file, stage2_title_col, stage2_abstract_col, stage2_fulltext_col, stage2_criteria, extraction_fields, stage2_sample],
-            outputs=[stage2_results, stage2_table, stage2_download_data, stage2_download_btn]
         )
-        stage1_download_btn.click(lambda data: data, inputs=[stage1_download_data], outputs=[gr.File()])
-        stage2_download_btn.click(lambda data: data, inputs=[stage2_download_data], outputs=[gr.File()])
     return interface
 if __name__ == "__main__":
-    interface = create_interface()
-    interface.launch()

 import gradio as gr
 import pandas as pd
+import numpy as np
 import torch
+from transformers import (
+    pipeline,
+    AutoTokenizer,
+    AutoModel,
+    AutoModelForSequenceClassification
+)
+from sentence_transformers import SentenceTransformer, CrossEncoder
 import re
+from typing import List, Dict, Tuple, Optional
+import warnings
+warnings.filterwarnings('ignore')
+# ============================================================================
+# ADVANCED MODEL INITIALIZATION
+# ============================================================================
+class AdvancedMedicalScreener:
+    def __init__(self):
+        """Initialize all advanced NLP models for medical literature screening"""
+        print("🚀 Initializing Advanced Medical Screening Models...")
+        # 1. Biomedical language model for embeddings
+        print("Loading PubMedBERT for medical text understanding...")
+        self.pubmed_tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
+        self.pubmed_model = AutoModel.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
+        # 2. Cross-encoder for accurate semantic similarity
+        print("Loading Cross-Encoder for semantic matching...")
+        self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2', max_length=512)
+        # 3. Zero-shot classifier for criteria matching
+        print("Loading Zero-Shot Classifier...")
+        self.zero_shot = pipeline(
+            "zero-shot-classification",
+            model="facebook/bart-large-mnli",
+            device=0 if torch.cuda.is_available() else -1
+        )
+        # 4. Sentence transformer for fast similarity
+        print("Loading Sentence Transformer...")
+        self.sentence_model = SentenceTransformer('pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb')
+        # 5. Medical NER for entity extraction (optional, lightweight)
+        print("Loading Medical NER model...")
+        try:
+            self.ner_pipeline = pipeline(
+                "ner",
+                model="dmis-lab/biobert-base-cased-v1.2",
+                aggregation_strategy="simple"
+            )
+        except:
+            self.ner_pipeline = None
+            print("Note: Medical NER model not available, using fallback")
+        print("✅ All models loaded successfully!")
+        # Medical terminology expansions
+        self.medical_synonyms = {
+            'rct': ['randomized controlled trial', 'randomised controlled trial', 'randomized clinical trial'],
+            'pain': ['pain', 'nociception', 'analgesia', 'hyperalgesia', 'allodynia', 'neuropathic pain',
+                    'chronic pain', 'acute pain', 'postoperative pain', 'pain management'],
+            'surgery': ['surgery', 'surgical', 'operation', 'operative', 'postoperative', 'perioperative',
+                       'preoperative', 'surgical procedure', 'surgical intervention'],
+            'study design': ['study design', 'trial design', 'research design', 'methodology',
+                           'randomized', 'controlled', 'cohort', 'case-control', 'cross-sectional',
+                           'prospective', 'retrospective', 'observational', 'experimental'],
+            'systematic review': ['systematic review', 'meta-analysis', 'meta analysis', 'evidence synthesis'],
+            'case report': ['case report', 'case study', 'case series', 'case presentation'],
+            'clinical trial': ['clinical trial', 'clinical study', 'trial', 'intervention study'],
+        }
+        # Study design hierarchy for classification
+        self.study_designs = {
+            'high_quality': ['randomized controlled trial', 'systematic review', 'meta-analysis'],
+            'moderate_quality': ['cohort study', 'case-control study', 'controlled trial'],
+            'low_quality': ['case report', 'case series', 'opinion', 'editorial'],
+            'observational': ['cohort', 'case-control', 'cross-sectional', 'observational'],
+            'experimental': ['randomized', 'experimental', 'intervention', 'trial']
+        }
+    def get_pubmed_embedding(self, text: str) -> np.ndarray:
+        """Get PubMedBERT embedding for medical text"""
+        inputs = self.pubmed_tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512,
+            padding=True
+        )
+        with torch.no_grad():
+            outputs = self.pubmed_model(**inputs)
+            # Use CLS token embedding
+            embedding = outputs.last_hidden_state[:, 0, :].numpy()
+        return embedding.squeeze()
+    def expand_medical_terms(self, term: str) -> List[str]:
+        """Expand medical terms with synonyms and related concepts"""
+        term_lower = term.lower()
+        expanded = [term]
+        # Check for known medical synonyms
+        for key, synonyms in self.medical_synonyms.items():
+            if key in term_lower or any(syn in term_lower for syn in synonyms):
+                expanded.extend(synonyms)
+        # Add variations
+        if 'pain' in term_lower:
+            expanded.extend(['analgesic', 'nociceptive', 'painful'])
+        if 'surgery' in term_lower or 'surgical' in term_lower:
+            expanded.extend(['surgeon', 'resection', 'excision', 'incision'])
+        return list(set(expanded))
+    def parse_advanced_criteria(self, criteria_text: str) -> Dict:
+        """Advanced parsing of inclusion/exclusion criteria with medical understanding"""
+        criteria = {
+            'population': [],
+            'intervention': [],
+            'comparator': [],
+            'outcomes': [],
+            'study_design': [],
+            'include_general': [],
+            'exclude_general': [],
+            'pain_related': [],
+            'surgery_related': []
+        }
+        lines = criteria_text.split('\n')
+        current_section = None
+        is_exclusion = False
+        for line in lines:
+            line_clean = line.strip()
+            line_lower = line_clean.lower()
+            if not line_clean:
                 continue
+            # Detect exclusion context
+            if 'exclude' in line_lower:
+                is_exclusion = True
+                current_section = 'exclude_general'
+            elif 'include' in line_lower:
+                is_exclusion = False
+                current_section = 'include_general'
+            # Detect PICOS sections
+            elif any(term in line_lower for term in ['population:', 'participants:', 'patients:']):
+                current_section = 'population'
+            elif any(term in line_lower for term in ['intervention:', 'exposure:', 'treatment:']):
+                current_section = 'intervention'
+            elif any(term in line_lower for term in ['comparator:', 'control:', 'comparison:']):
+                current_section = 'comparator'
+            elif any(term in line_lower for term in ['outcome:', 'endpoint:', 'measure:']):
+                current_section = 'outcomes'
+            elif any(term in line_lower for term in ['study design:', 'design:', 'study type:', 'methodology:']):
+                current_section = 'study_design'
+            # Special detection for pain and surgery
+            elif 'pain' in line_lower:
+                current_section = 'pain_related'
+            elif any(term in line_lower for term in ['surgery', 'surgical', 'operation']):
+                current_section = 'surgery_related'
+            # Extract criteria items
+            elif current_section:
+                # Handle bullet points or dashes
+                if line_clean.startswith(('-', '•', '*', '·')):
+                    item = line_clean[1:].strip()
+                    if item:
+                        # Expand medical terms
+                        expanded_items = self.expand_medical_terms(item)
+                        criteria[current_section].extend(expanded_items)
+                # Handle comma-separated items
+                elif ',' in line_clean and ':' not in line_clean:
+                    items = [i.strip() for i in line_clean.split(',')]
+                    for item in items:
+                        if item and len(item) > 2:
+                            expanded_items = self.expand_medical_terms(item)
+                            criteria[current_section].extend(expanded_items)
+                # Handle single items
+                elif line_clean and not any(marker in line_lower for marker in [':', 'population', 'intervention', 'outcome']):
+                    expanded_items = self.expand_medical_terms(line_clean)
+                    criteria[current_section].extend(expanded_items)
+        # Remove duplicates
+        for key in criteria:
+            criteria[key] = list(set(criteria[key]))
+        return criteria
+    def cross_encoder_score(self, text: str, criteria: str) -> float:
+        """Calculate cross-encoder similarity score"""
+        try:
+            score = self.cross_encoder.predict([[text, criteria]])
+            # Normalize to 0-1 range
+            return float(1 / (1 + np.exp(-score[0])))
+        except:
+            return 0.0
+    def zero_shot_classify(self, text: str, labels: List[str], hypothesis_template: str = "This study is about {}") -> Dict:
+        """Perform zero-shot classification with custom hypothesis"""
+        if not labels:
+            return {}
+        try:
+            result = self.zero_shot(
+                text,
+                candidate_labels=labels,
+                hypothesis_template=hypothesis_template,
+                multi_label=True
+            )
+            # Convert to dictionary with scores
+            scores = {}
+            for label, score in zip(result['labels'], result['scores']):
+                scores[label] = score
+            return scores
+        except:
+            return {}
+    def evaluate_study_design(self, text: str) -> Dict:
+        """Evaluate study design quality and type"""
+        design_labels = [
+            'randomized controlled trial',
+            'systematic review',
+            'meta-analysis',
+            'cohort study',
+            'case-control study',
+            'cross-sectional study',
+            'case report',
+            'observational study',
+            'experimental study'
+        ]
+        scores = self.zero_shot_classify(
+            text,
+            design_labels,
+            hypothesis_template="This is a {}"
+        )
+        # Determine quality level
+        quality = 'unknown'
+        max_design = max(scores.items(), key=lambda x: x[1])[0] if scores else ''
+        for level, designs in self.study_designs.items():
+            if any(design in max_design.lower() for design in designs):
+                quality = level
+                break
+        return {
+            'design_scores': scores,
+            'primary_design': max_design,
+            'quality_level': quality
+        }
+    def evaluate_pain_surgery_relevance(self, text: str) -> Dict:
+        """Specifically evaluate pain and surgery relevance"""
+        # Pain-related evaluation
+        pain_terms = [
+            'chronic pain', 'acute pain', 'postoperative pain',
+            'pain management', 'analgesia', 'neuropathic pain',
+            'pain relief', 'pain control', 'pain assessment'
+        ]
+        pain_scores = self.zero_shot_classify(
+            text,
+            pain_terms,
+            hypothesis_template="This study involves {}"
+        )
+        # Surgery-related evaluation
+        surgery_terms = [
+            'surgical procedure', 'postoperative', 'perioperative',
+            'surgical intervention', 'operation', 'surgical outcomes',
+            'surgical complications', 'surgical technique'
+        ]
+        surgery_scores = self.zero_shot_classify(
+            text,
+            surgery_terms,
+            hypothesis_template="This study involves {}"
+        )
+        return {
+            'pain_relevance': max(pain_scores.values()) if pain_scores else 0,
+            'surgery_relevance': max(surgery_scores.values()) if surgery_scores else 0,
+            'pain_terms': pain_scores,
+            'surgery_terms': surgery_scores
+        }
+    def stage1_advanced_classification(self, title: str, abstract: str, criteria_text: str) -> Dict:
+        """Advanced Stage 1 classification using multiple NLP models"""
+        # Combine text
+        study_text = f"{title} {abstract}"
+        if len(study_text.strip()) < 20:
+            return {
+                'decision': 'UNCLEAR',
+                'confidence': 0,
+                'reasoning': 'Insufficient text for analysis',
+                'detailed_scores': {}
+            }
+        # Parse criteria with medical understanding
+        criteria = self.parse_advanced_criteria(criteria_text)
+        # Initialize scoring components
+        scores = {
+            'population': 0,
+            'intervention': 0,
+            'comparator': 0,
+            'outcomes': 0,
+            'study_design': 0,
+            'inclusion': 0,
+            'exclusion': 0,
+            'pain_relevance': 0,
+            'surgery_relevance': 0
+        }
+        reasoning_parts = []
+        # 1. Evaluate PICOS elements using cross-encoder
+        for element in ['population', 'intervention', 'comparator', 'outcomes']:
+            if criteria[element]:
+                element_scores = []
+                for criterion in criteria[element][:5]:  # Limit to top 5 to avoid overload
+                    score = self.cross_encoder_score(study_text, criterion)
+                    element_scores.append(score)
+                if element_scores:
+                    scores[element] = max(element_scores)
+                    if scores[element] > 0.5:
+                        best_match = criteria[element][element_scores.index(max(element_scores))]
+                        reasoning_parts.append(f"{element.capitalize()}: '{best_match}' ({scores[element]:.2f})")
+        # 2. Evaluate study design
+        design_eval = self.evaluate_study_design(study_text)
+        scores['study_design'] = max(design_eval['design_scores'].values()) if design_eval['design_scores'] else 0
+        if scores['study_design'] > 0.5:
+            reasoning_parts.append(f"Study Design: {design_eval['primary_design']} ({scores['study_design']:.2f})")
+        # 3. Evaluate pain and surgery relevance if applicable
+        if criteria['pain_related'] or 'pain' in criteria_text.lower():
+            pain_surgery_eval = self.evaluate_pain_surgery_relevance(study_text)
+            scores['pain_relevance'] = pain_surgery_eval['pain_relevance']
+            if scores['pain_relevance'] > 0.5:
+                reasoning_parts.append(f"Pain Relevance: {scores['pain_relevance']:.2f}")
+        if criteria['surgery_related'] or 'surgery' in criteria_text.lower():
+            pain_surgery_eval = self.evaluate_pain_surgery_relevance(study_text)
+            scores['surgery_relevance'] = pain_surgery_eval['surgery_relevance']
+            if scores['surgery_relevance'] > 0.5:
+                reasoning_parts.append(f"Surgery Relevance: {scores['surgery_relevance']:.2f}")
+        # 4. Evaluate inclusion criteria
+        if criteria['include_general']:
+            inclusion_scores = []
+            for criterion in criteria['include_general'][:3]:
+                score = self.cross_encoder_score(study_text, criterion)
+                inclusion_scores.append(score)
+            scores['inclusion'] = max(inclusion_scores) if inclusion_scores else 0
+            if scores['inclusion'] > 0.5:
+                reasoning_parts.append(f"Inclusion Match: {scores['inclusion']:.2f}")
+        # 5. Evaluate exclusion criteria
+        if criteria['exclude_general']:
+            exclusion_scores = []
+            for criterion in criteria['exclude_general'][:3]:
+                score = self.cross_encoder_score(study_text, criterion)
+                exclusion_scores.append(score)
+            scores['exclusion'] = max(exclusion_scores) if exclusion_scores else 0
+            if scores['exclusion'] > 0.6:
+                reasoning_parts.append(f"EXCLUSION Match: {scores['exclusion']:.2f}")
+        # 6. Check for low-quality study designs
+        if design_eval.get('quality_level') == 'low_quality':
+            scores['exclusion'] = max(scores['exclusion'], 0.7)
+            reasoning_parts.append(f"Low Quality Design: {design_eval['primary_design']}")
+        # Decision Logic with Confidence Calibration
+        decision, confidence = self._make_decision_stage1(scores, design_eval)
+        # Format reasoning
+        if not reasoning_parts:
+            reasoning_parts.append("No strong matches found")
+        reasoning = f"Stage 1 {decision}: {'; '.join(reasoning_parts)}"
+        return {
+            'decision': decision,
+            'confidence': confidence,
+            'reasoning': reasoning,
+            'detailed_scores': scores,
+            'study_design': design_eval.get('primary_design', 'Unknown'),
+            'quality_level': design_eval.get('quality_level', 'Unknown')
+        }
+    def _make_decision_stage1(self, scores: Dict, design_eval: Dict) -> Tuple[str, int]:
+        """Make Stage 1 decision based on scores with calibrated confidence"""
+        # Strong exclusion criteria
+        if scores['exclusion'] > 0.65:
+            confidence = min(int(scores['exclusion'] * 100), 90)
+            return 'EXCLUDE', confidence
+        # Low quality design exclusion
+        if design_eval.get('quality_level') == 'low_quality' and scores['study_design'] > 0.7:
+            return 'EXCLUDE', 75
+        # Calculate inclusion strength
+        picos_scores = [scores['population'], scores['intervention'], scores['outcomes']]
+        relevant_picos = sum(1 for s in picos_scores if s > 0.5)
+        avg_picos = np.mean([s for s in picos_scores if s > 0.3]) if any(s > 0.3 for s in picos_scores) else 0
+        # Strong inclusion - multiple PICOS matches
+        if relevant_picos >= 2 and avg_picos > 0.6:
+            confidence = min(int(avg_picos * 85), 85)
+            return 'INCLUDE', confidence
+        # Moderate inclusion - some relevant matches
+        if relevant_picos >= 1 or scores['inclusion'] > 0.6:
+            best_score = max(scores['population'], scores['intervention'], scores['outcomes'], scores['inclusion'])
+            confidence = min(int(best_score * 75), 75)
+            return 'INCLUDE', confidence
+        # Special consideration for pain/surgery studies
+        if (scores['pain_relevance'] > 0.6 or scores['surgery_relevance'] > 0.6) and \
+           design_eval.get('quality_level') in ['high_quality', 'moderate_quality']:
+            confidence = 70
+            return 'INCLUDE', confidence
+        # Weak matches - need manual review
+        if any(s > 0.4 for s in [scores['population'], scores['intervention'], scores['outcomes']]):
+            return 'UNCLEAR', 50
+        # No relevant matches
+        return 'EXCLUDE', 60
+# ============================================================================
+# GRADIO INTERFACE FUNCTIONS
+# ============================================================================
+# Initialize the screener globally
+screener = None
+def initialize_screener():
+    """Initialize the screener if not already done"""
+    global screener
+    if screener is None:
+        screener = AdvancedMedicalScreener()
+    return screener
+def process_stage1_advanced(file, title_col, abstract_col, criteria, sample_size):
+    """Process Stage 1 screening with advanced NLP models"""
+    try:
+        # Initialize screener
+        model = initialize_screener()
+        # Read CSV
+        df = pd.read_csv(file.name)
         if sample_size < len(df):
             df = df.head(sample_size)
         results = []
         for idx, row in df.iterrows():
             title = str(row[title_col]) if pd.notna(row[title_col]) else ""
             abstract = str(row[abstract_col]) if pd.notna(row[abstract_col]) else ""
             if not title and not abstract:
                 continue
+            # Use advanced classification
+            classification = model.stage1_advanced_classification(title, abstract, criteria)
             result = {
                 'Study_ID': idx + 1,
                 'Title': title[:100] + "..." if len(title) > 100 else title,
+                'Stage1_Decision': classification['decision'],
+                'Stage1_Confidence': f"{classification['confidence']}%",
+                'Study_Design': classification.get('study_design', 'Unknown'),
+                'Quality_Level': classification.get('quality_level', 'Unknown'),
+                'Stage1_Reasoning': classification['reasoning'],
+                'Ready_for_Stage2': 'Yes' if classification['decision'] == 'INCLUDE' else 'No',
                 'Full_Title': title,
+                'Full_Abstract': abstract
             }
             results.append(result)
         results_df = pd.DataFrame(results)
+        # Generate summary
         total = len(results_df)
+        included = len(results_df[results_df['Stage1_Decision'] == 'INCLUDE'])
+        excluded = len(results_df[results_df['Stage1_Decision'] == 'EXCLUDE'])
+        unclear = len(results_df[results_df['Stage1_Decision'] == 'UNCLEAR'])
+        # Quality breakdown
+        quality_counts = results_df['Quality_Level'].value_counts().to_dict()
+        quality_summary = "\n".join([f"  - {level}: {count}" for level, count in quality_counts.items()])
         summary = f"""
+## 📊 Advanced Stage 1 Results (AI-Powered Medical Screening)
+**Screening Complete with Advanced NLP Models:**
+- **Total Studies Analyzed:** {total}
+- **✅ Include for Stage 2:** {included} ({included/total*100:.1f}%)
+- **❌ Exclude:** {excluded} ({excluded/total*100:.1f}%)
+- **⚠️ Needs Manual Review:** {unclear} ({unclear/total*100:.1f}%)
+**Study Quality Distribution:**
+{quality_summary}
+**Models Used:**
+- PubMedBERT for medical text understanding
+- Cross-encoder for semantic similarity
+- Zero-shot classification for criteria matching
+- Medical NER for entity extraction
+**Next Steps:**
+1. Review {unclear} studies marked as UNCLEAR
+2. Proceed to Stage 2 with {included} included studies
+3. Consider manual validation of borderline cases
         """
         return summary, results_df, results_df.to_csv(index=False)
     except Exception as e:
         return f"Error: {str(e)}", None, ""
+def create_advanced_interface():
+    """Create the Gradio interface with advanced NLP capabilities"""
+    with gr.Blocks(title="🔬 Advanced Medical Literature Screening", theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
+        # 🔬 Advanced Medical Literature Screening with AI
+        **State-of-the-art NLP models for systematic review screening**
+        This tool uses advanced transformer models specifically trained on medical literature:
+        - **PubMedBERT**: Understands medical terminology and concepts
+        - **Cross-Encoders**: Accurate semantic matching for criteria
+        - **Zero-Shot Classification**: Flexible criteria evaluation
+        - **Medical NER**: Extracts medical entities automatically
+        Optimized for **pain**, **surgery**, and **study design** criteria, with general medical understanding.
         """)
         with gr.Tabs():
             # STAGE 1 TAB
+            with gr.TabItem("📋 Stage 1: Advanced Title/Abstract Screening"):
                 with gr.Row():
                     with gr.Column(scale=1):
                         gr.Markdown("### 📁 Upload Study Data")
                             stage1_title_col = gr.Dropdown(label="Title Column", choices=[], interactive=True)
                             stage1_abstract_col = gr.Dropdown(label="Abstract Column", choices=[], interactive=True)
+                        stage1_sample = gr.Slider(
+                            label="Studies to Process",
+                            minimum=5,
+                            maximum=500,
+                            value=100,
+                            step=5,
+                            info="Processing time increases with more studies"
+                        )
                     with gr.Column(scale=1):
+                        gr.Markdown("### 🎯 Inclusion/Exclusion Criteria")
                         stage1_criteria = gr.Textbox(
+                            label="Enter your criteria (understands medical terminology)",
                             value="""POPULATION:
+- Adult patients
+- Chronic pain patients
+- Surgical patients
 INTERVENTION:
+- Pain management interventions
+- Surgical procedures
+- Analgesic treatments
 OUTCOMES:
+- Pain intensity
+- Pain relief
+- Functional outcomes
+- Quality of life
 STUDY DESIGN:
 - Randomized controlled trials
+- Systematic reviews
 - Cohort studies
+- NOT case reports
 EXCLUDE:
 - Animal studies
+- Pediatric only
 - Case reports
+- Editorials""",
+                            lines=20,
+                            info="The AI understands medical synonyms and related terms"
                         )
+                with gr.Row():
+                    stage1_process_btn = gr.Button(
+                        "🚀 Start Advanced AI Screening",
+                        variant="primary",
+                        scale=2
+                    )
+                    gr.Markdown("*First run may take longer to load models*", scale=1)
                 stage1_results = gr.Markdown()
+                stage1_table = gr.Dataframe(
+                    label="Stage 1 Results with Quality Assessment",
+                    wrap=True
+                )
                 stage1_download_data = gr.Textbox(visible=False)
+                stage1_download_btn = gr.DownloadButton(
+                    label="💾 Download Stage 1 Results",
+                    visible=False
+                )
+            # HELP TAB
+            with gr.TabItem("❓ Help & Guidelines"):
                 gr.Markdown("""
+                ## 🤖 Advanced Features Explained
+                ### **Medical Understanding**
+                The system automatically:
+                - Recognizes medical synonyms (e.g., RCT = randomized controlled trial)
+                - Understands pain-related terms (nociception, analgesia, hyperalgesia)
+                - Identifies surgical concepts (perioperative, postoperative, resection)
+                - Evaluates study quality based on design
+                ### **How to Write Effective Criteria**
+                1. **Be specific but comprehensive:**
+                   - ✅ "chronic pain lasting > 3 months"
+                   - ✅ "postoperative pain management"
+                   - ❌ "pain" (too vague)
+                2. **Use medical terms freely:**
+                   - The AI understands medical terminology
+                   - It will automatically expand terms with synonyms
+                   - Example: "surgery" → surgical, operation, resection, etc.
+                3. **Specify study designs clearly:**
+                   - High quality: RCT, systematic review, meta-analysis
+                   - Moderate: cohort, case-control
+                   - Low: case reports, opinions
+                ### **Confidence Scores**
+                - **80-100%**: Strong match, high confidence
+                - **60-79%**: Good match, moderate confidence
+                - **40-59%**: Weak match, needs review
+                - **0-39%**: Poor match, likely exclude
+                ### **Tips for Best Results**
+                - Include both inclusion AND exclusion criteria
+                - Specify population, intervention, and outcomes
+                - Mention specific study designs to include/exclude
+                - The AI works best with complete abstracts
                 """)
+        # Event handlers
         def update_stage1_columns(file):
             if file is None:
                 return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
             except:
                 return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
+        stage1_file.change(
+            fn=update_stage1_columns,
+            inputs=[stage1_file],
+            outputs=[stage1_title_col, stage1_abstract_col]
+        )
+        def process_with_download(*args):
+            summary, table, csv_data = process_stage1_advanced(*args)
             return summary, table, csv_data, gr.DownloadButton(visible=bool(csv_data))
         stage1_process_btn.click(
+            fn=process_with_download,
             inputs=[stage1_file, stage1_title_col, stage1_abstract_col, stage1_criteria, stage1_sample],
             outputs=[stage1_results, stage1_table, stage1_download_data, stage1_download_btn]
         )
+        stage1_download_btn.click(
+            lambda data: data,
+            inputs=[stage1_download_data],
+            outputs=[gr.File()]
         )
     return interface
 if __name__ == "__main__":
+    print("Starting Advanced Medical Literature Screening System...")
+    interface = create_advanced_interface()
+    interface.launch()

requirements.txt CHANGED Viewed

@@ -1,6 +1,21 @@
-gradio
-pandas
-transformers
-torch
-requests
-numpy

+# Core dependencies
+gradio>=3.40.0
+pandas>=1.3.0
+numpy>=1.21.0
+requests>=2.28.0
+# Deep Learning frameworks
+torch>=1.9.0
+transformers>=4.30.0
+# Advanced NLP models - REQUIRED for improved app
+sentence-transformers>=2.2.0
+# Optional but recommended for better performance
+accelerate>=0.20.0
+scipy>=1.7.0
+scikit-learn>=1.0.0
+# For data processing and utilities
+tqdm>=4.65.0
+tokenizers>=0.13.0