HebaElshimy commited on
Commit
fee9667
Β·
verified Β·
1 Parent(s): f2eb705

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +580 -455
  2. requirements.txt +21 -6
app.py CHANGED
@@ -1,317 +1,532 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import requests
4
- import json
5
- from transformers import pipeline
6
  import torch
7
- import time
8
- from typing import List, Dict, Tuple
 
 
 
 
 
9
  import re
10
- import numpy as np
11
-
12
- # Initialize models
13
- print("Loading models...")
14
- classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
15
- similarity_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
16
- print("Models loaded successfully!")
17
-
18
- def cosine_similarity(a, b):
19
- """Simple cosine similarity calculation"""
20
- dot_product = np.dot(a, b)
21
- norm_a = np.linalg.norm(a)
22
- norm_b = np.linalg.norm(b)
23
- return dot_product / (norm_a * norm_b) if norm_a > 0 and norm_b > 0 else 0
24
-
25
- def get_text_embedding(text):
26
- """Get text embedding using the similarity model"""
27
- try:
28
- embeddings = similarity_model(text)
29
- return np.mean(embeddings[0], axis=0)
30
- except:
31
- return np.zeros(384)
32
 
33
- def parse_criteria(criteria_text: str, stage: str = "stage1") -> Dict:
34
- """Parse criteria differently for Stage 1 vs Stage 2"""
35
- criteria = {
36
- 'population': [], 'intervention': [], 'comparator': [], 'outcomes': [],
37
- 'study_design': [], 'include_general': [], 'exclude_general': []
38
- }
39
-
40
- lines = criteria_text.lower().split('\n')
41
- current_section = None
42
-
43
- for line in lines:
44
- line = line.strip()
45
- if not line:
46
- continue
47
-
48
- # Detect section headers
49
- if any(keyword in line for keyword in ['population:', 'participants:', 'subjects:']):
50
- current_section = 'population'
51
- elif any(keyword in line for keyword in ['intervention:', 'exposure:', 'treatment:']):
52
- current_section = 'intervention'
53
- elif any(keyword in line for keyword in ['comparator:', 'control:', 'comparison:']):
54
- current_section = 'comparator'
55
- elif any(keyword in line for keyword in ['outcomes:', 'endpoint:', 'results:']):
56
- current_section = 'outcomes'
57
- elif any(keyword in line for keyword in ['study design:', 'design:', 'study type:']):
58
- current_section = 'study_design'
59
- elif 'include' in line and ':' in line:
60
- current_section = 'include_general'
61
- elif 'exclude' in line and ':' in line:
62
- current_section = 'exclude_general'
63
- elif line.startswith('-') and current_section:
64
- term = line[1:].strip()
65
- if term and len(term) > 2:
66
- criteria[current_section].append(term)
67
- elif current_section and not any(keyword in line for keyword in ['include', 'exclude', 'population', 'intervention', 'comparator', 'outcomes', 'study']):
68
- terms = [t.strip() for t in line.split(',') if t.strip() and len(t.strip()) > 2]
69
- criteria[current_section].extend(terms)
70
-
71
- return criteria
72
 
73
- def semantic_similarity_score(study_text: str, criteria_terms: List[str]) -> Tuple[float, str]:
74
- """Calculate semantic similarity between study and criteria terms"""
75
- if not criteria_terms:
76
- return 0.0, ""
77
-
78
- study_embedding = get_text_embedding(study_text)
79
- best_score, best_match = 0.0, ""
80
-
81
- for term in criteria_terms:
82
- term_embedding = get_text_embedding(term)
83
- similarity = cosine_similarity(study_embedding, term_embedding)
84
- if similarity > best_score:
85
- best_score, best_match = similarity, term
86
-
87
- return best_score, best_match
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- def stage1_classification(title: str, abstract: str, criteria_text: str) -> Dict:
90
- """Stage 1: Broad title/abstract screening with high sensitivity"""
91
-
92
- study_text = f"{title} {abstract}".lower()
93
- if len(study_text.strip()) < 20:
94
- return {'decision': 'UNCLEAR', 'confidence': 20, 'reasoning': 'Insufficient text', 'stage': 1}
95
-
96
- criteria = parse_criteria(criteria_text, "stage1")
97
-
98
- # Calculate PICOS scores with lower thresholds for Stage 1
99
- pop_score, pop_match = semantic_similarity_score(study_text, criteria['population'])
100
- int_score, int_match = semantic_similarity_score(study_text, criteria['intervention'])
101
- out_score, out_match = semantic_similarity_score(study_text, criteria['outcomes'])
102
- design_score, design_match = semantic_similarity_score(study_text, criteria['study_design'])
103
- inc_score, inc_match = semantic_similarity_score(study_text, criteria['include_general'])
104
- exc_score, exc_match = semantic_similarity_score(study_text, criteria['exclude_general'])
105
-
106
- reasoning_parts = []
107
- if pop_score > 0.25: reasoning_parts.append(f"Population: '{pop_match}' ({pop_score:.2f})")
108
- if int_score > 0.25: reasoning_parts.append(f"Intervention: '{int_match}' ({int_score:.2f})")
109
- if out_score > 0.25: reasoning_parts.append(f"Outcome: '{out_match}' ({out_score:.2f})")
110
- if design_score > 0.25: reasoning_parts.append(f"Design: '{design_match}' ({design_score:.2f})")
111
- if inc_score > 0.25: reasoning_parts.append(f"Include: '{inc_match}' ({inc_score:.2f})")
112
- if exc_score > 0.25: reasoning_parts.append(f"Exclude: '{exc_match}' ({exc_score:.2f})")
113
-
114
- # Stage 1 Decision Logic (High Sensitivity)
115
- if exc_score > 0.35: # Clear exclusion
116
- decision, confidence = 'EXCLUDE', min(int(exc_score * 100), 90)
117
- reasoning = f"Stage 1 EXCLUDE: {'; '.join(reasoning_parts)}"
118
- elif sum([pop_score > 0.25, int_score > 0.25, out_score > 0.25]) >= 1: # Any relevant match
119
- avg_score = np.mean([s for s in [pop_score, int_score, out_score, design_score, inc_score] if s > 0.25])
120
- decision, confidence = 'INCLUDE', min(int(avg_score * 75), 80)
121
- reasoning = f"Stage 1 INCLUDE: {'; '.join(reasoning_parts)}"
122
- else:
123
- decision, confidence = 'UNCLEAR', 40
124
- reasoning = f"Stage 1 UNCLEAR: {'; '.join(reasoning_parts) if reasoning_parts else 'No clear matches'}"
125
-
126
- return {'decision': decision, 'confidence': confidence, 'reasoning': reasoning, 'stage': 1}
127
 
128
- def stage2_classification(title: str, abstract: str, full_text: str, criteria_text: str,
129
- data_extraction_fields: Dict = None) -> Dict:
130
- """Stage 2: Detailed full-text screening with data extraction"""
131
-
132
- # Combine all available text
133
- study_text = f"{title} {abstract} {full_text}".lower()
134
-
135
- if len(study_text.strip()) < 50:
136
- return {'decision': 'UNCLEAR', 'confidence': 25, 'reasoning': 'Insufficient full text', 'stage': 2}
137
-
138
- criteria = parse_criteria(criteria_text, "stage2")
139
-
140
- # More stringent scoring for Stage 2
141
- pop_score, pop_match = semantic_similarity_score(study_text, criteria['population'])
142
- int_score, int_match = semantic_similarity_score(study_text, criteria['intervention'])
143
- comp_score, comp_match = semantic_similarity_score(study_text, criteria['comparator'])
144
- out_score, out_match = semantic_similarity_score(study_text, criteria['outcomes'])
145
- design_score, design_match = semantic_similarity_score(study_text, criteria['study_design'])
146
- exc_score, exc_match = semantic_similarity_score(study_text, criteria['exclude_general'])
147
-
148
- # Data extraction scoring
149
- extraction_scores = {}
150
- if data_extraction_fields:
151
- for field, terms in data_extraction_fields.items():
152
- if terms:
153
- field_score, field_match = semantic_similarity_score(study_text, terms)
154
- extraction_scores[field] = {'score': field_score, 'match': field_match}
155
-
156
- reasoning_parts = []
157
- if pop_score > 0.3: reasoning_parts.append(f"Population: '{pop_match}' ({pop_score:.2f})")
158
- if int_score > 0.3: reasoning_parts.append(f"Intervention: '{int_match}' ({int_score:.2f})")
159
- if comp_score > 0.3: reasoning_parts.append(f"Comparator: '{comp_match}' ({comp_score:.2f})")
160
- if out_score > 0.3: reasoning_parts.append(f"Outcome: '{out_match}' ({out_score:.2f})")
161
- if design_score > 0.3: reasoning_parts.append(f"Design: '{design_match}' ({design_score:.2f})")
162
- if exc_score > 0.3: reasoning_parts.append(f"Exclusion: '{exc_match}' ({exc_score:.2f})")
163
-
164
- # Stage 2 Decision Logic (High Specificity)
165
- if exc_score > 0.4: # Strong exclusion
166
- decision, confidence = 'EXCLUDE', min(int(exc_score * 100), 95)
167
- reasoning = f"Stage 2 EXCLUDE: {'; '.join(reasoning_parts)}"
168
- elif sum([pop_score > 0.4, int_score > 0.4, out_score > 0.4, design_score > 0.4]) >= 3: # Multiple strong matches
169
- avg_score = np.mean([pop_score, int_score, comp_score, out_score, design_score])
170
- decision, confidence = 'INCLUDE', min(int(avg_score * 85), 92)
171
- reasoning = f"Stage 2 INCLUDE: {'; '.join(reasoning_parts)}"
172
- elif max(pop_score, int_score, out_score) > 0.5: # Very strong single match
173
- decision, confidence = 'INCLUDE', min(int(max(pop_score, int_score, out_score) * 80), 88)
174
- reasoning = f"Stage 2 INCLUDE: {'; '.join(reasoning_parts)}"
175
- else:
176
- decision, confidence = 'EXCLUDE', 60
177
- reasoning = f"Stage 2 EXCLUDE: Insufficient criteria match. {'; '.join(reasoning_parts)}"
178
-
179
- result = {
180
- 'decision': decision,
181
- 'confidence': confidence,
182
- 'reasoning': reasoning,
183
- 'stage': 2,
184
- 'extraction_data': extraction_scores
185
- }
186
-
187
- return result
188
 
189
- def process_stage1(file, title_col, abstract_col, criteria, sample_size):
190
- """Process Stage 1 screening"""
191
- try:
192
- df = pd.read_csv(file.name)
193
- if sample_size < len(df):
194
- df = df.head(sample_size)
 
 
 
 
 
 
 
195
 
196
- results = []
197
- for idx, row in df.iterrows():
198
- title = str(row[title_col]) if pd.notna(row[title_col]) else ""
199
- abstract = str(row[abstract_col]) if pd.notna(row[abstract_col]) else ""
 
 
 
200
 
201
- if not title and not abstract:
202
  continue
203
 
204
- classification = stage1_classification(title, abstract, criteria)
 
 
 
 
 
 
205
 
206
- result = {
207
- 'Study_ID': idx + 1,
208
- 'Title': title[:100] + "..." if len(title) > 100 else title,
209
- 'Stage1_Decision': classification['decision'],
210
- 'Stage1_Confidence': f"{classification['confidence']}%",
211
- 'Stage1_Reasoning': classification['reasoning'],
212
- 'Ready_for_Stage2': 'Yes' if classification['decision'] == 'INCLUDE' else 'No',
213
- 'Full_Title': title,
214
- 'Full_Abstract': abstract
215
- }
216
- results.append(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
- results_df = pd.DataFrame(results)
 
 
219
 
220
- # Summary for Stage 1
221
- total = len(results_df)
222
- included = len(results_df[results_df['Stage1_Decision'] == 'INCLUDE'])
223
- excluded = len(results_df[results_df['Stage1_Decision'] == 'EXCLUDE'])
224
- unclear = len(results_df[results_df['Stage1_Decision'] == 'UNCLEAR'])
 
 
 
 
 
 
 
 
 
 
225
 
226
- summary = f"""
227
- ## πŸ“Š Stage 1 (Title/Abstract) Results
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
- **Screening Complete:**
230
- - **Total Studies:** {total}
231
- - **Include for Stage 2:** {included} ({included/total*100:.1f}%)
232
- - **Exclude:** {excluded} ({excluded/total*100:.1f}%)
233
- - **Needs Manual Review:** {unclear} ({unclear/total*100:.1f}%)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
- **Next Steps:**
236
- 1. Review {unclear} studies marked as UNCLEAR
237
- 2. Proceed to Stage 2 with {included} included studies
238
- 3. Obtain full texts for Stage 2 screening
239
- """
 
 
 
240
 
241
- return summary, results_df, results_df.to_csv(index=False)
 
 
 
 
242
 
243
- except Exception as e:
244
- return f"Error: {str(e)}", None, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
- def process_stage2(file, title_col, abstract_col, fulltext_col, criteria, extraction_fields, sample_size):
247
- """Process Stage 2 screening with data extraction"""
248
- try:
249
- df = pd.read_csv(file.name)
 
 
 
 
 
 
 
 
 
 
 
250
 
251
- # Filter to only Stage 1 included studies if column exists
252
- if 'Stage1_Decision' in df.columns:
253
- df = df[df['Stage1_Decision'] == 'INCLUDE']
 
 
 
 
 
 
 
 
 
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  if sample_size < len(df):
256
  df = df.head(sample_size)
257
 
258
- # Parse extraction fields
259
- extraction_dict = {}
260
- if extraction_fields:
261
- for line in extraction_fields.split('\n'):
262
- if ':' in line:
263
- field, terms = line.split(':', 1)
264
- extraction_dict[field.strip()] = [t.strip() for t in terms.split(',') if t.strip()]
265
-
266
  results = []
267
  for idx, row in df.iterrows():
268
  title = str(row[title_col]) if pd.notna(row[title_col]) else ""
269
  abstract = str(row[abstract_col]) if pd.notna(row[abstract_col]) else ""
270
- full_text = str(row[fulltext_col]) if fulltext_col and fulltext_col in df.columns and pd.notna(row[fulltext_col]) else ""
271
 
272
  if not title and not abstract:
273
  continue
274
 
275
- classification = stage2_classification(title, abstract, full_text, criteria, extraction_dict)
 
276
 
277
  result = {
278
  'Study_ID': idx + 1,
279
  'Title': title[:100] + "..." if len(title) > 100 else title,
280
- 'Stage2_Decision': classification['decision'],
281
- 'Stage2_Confidence': f"{classification['confidence']}%",
282
- 'Stage2_Reasoning': classification['reasoning'],
283
- 'Final_Include': 'Yes' if classification['decision'] == 'INCLUDE' else 'No',
284
- 'Extraction_Data': str(classification.get('extraction_data', {})),
 
285
  'Full_Title': title,
286
- 'Full_Abstract': abstract,
287
- 'Full_Text': full_text
288
  }
289
  results.append(result)
290
 
291
  results_df = pd.DataFrame(results)
292
 
293
- # Summary for Stage 2
294
  total = len(results_df)
295
- final_included = len(results_df[results_df['Stage2_Decision'] == 'INCLUDE'])
296
- final_excluded = len(results_df[results_df['Stage2_Decision'] == 'EXCLUDE'])
 
 
 
 
 
297
 
298
  summary = f"""
299
- ## πŸ“Š Stage 2 (Full-Text) Results
 
 
 
 
 
 
300
 
301
- **Detailed Screening Complete:**
302
- - **Studies Reviewed:** {total}
303
- - **Final INCLUDE:** {final_included} ({final_included/total*100:.1f}%)
304
- - **Final EXCLUDE:** {final_excluded} ({final_excluded/total*100:.1f}%)
305
 
306
- **Ready for Next Steps:**
307
- - **Data Extraction:** {final_included} studies
308
- - **Quality Assessment:** {final_included} studies
309
- - **Evidence Synthesis:** Ready to proceed
 
310
 
311
- **Recommended Actions:**
312
- 1. Export {final_included} included studies for detailed data extraction
313
- 2. Conduct quality assessment (ROB2, ROBINS-I, etc.)
314
- 3. Begin evidence synthesis and meta-analysis planning
315
  """
316
 
317
  return summary, results_df, results_df.to_csv(index=False)
@@ -319,23 +534,28 @@ def process_stage2(file, title_col, abstract_col, fulltext_col, criteria, extrac
319
  except Exception as e:
320
  return f"Error: {str(e)}", None, ""
321
 
322
- def create_interface():
323
- with gr.Blocks(title="πŸ”¬ 2-Stage Systematic Review AI Assistant", theme=gr.themes.Soft()) as interface:
 
324
 
325
  gr.Markdown("""
326
- # πŸ”¬ 2-Stage Systematic Review AI Assistant
327
 
328
- **Complete workflow for evidence-based systematic reviews**
329
 
330
- This tool supports the full 2-stage systematic review process:
331
- - **Stage 1:** Title/Abstract screening (high sensitivity)
332
- - **Stage 2:** Full-text screening with data extraction (high specificity)
 
 
 
 
333
  """)
334
 
335
  with gr.Tabs():
336
 
337
  # STAGE 1 TAB
338
- with gr.TabItem("πŸ“‹ Stage 1: Title/Abstract Screening"):
339
  with gr.Row():
340
  with gr.Column(scale=1):
341
  gr.Markdown("### πŸ“ Upload Study Data")
@@ -350,189 +570,113 @@ def create_interface():
350
  stage1_title_col = gr.Dropdown(label="Title Column", choices=[], interactive=True)
351
  stage1_abstract_col = gr.Dropdown(label="Abstract Column", choices=[], interactive=True)
352
 
353
- stage1_sample = gr.Slider(label="Studies to Process", minimum=5, maximum=500, value=100, step=5)
 
 
 
 
 
 
 
354
 
355
  with gr.Column(scale=1):
356
- gr.Markdown("### 🎯 Stage 1 Criteria (Broad/Sensitive)")
357
 
358
  stage1_criteria = gr.Textbox(
359
- label="Inclusion/Exclusion Criteria for Stage 1",
360
  value="""POPULATION:
361
- - Adult participants
362
- - Human studies
 
363
 
364
  INTERVENTION:
365
- - [Your intervention/exposure of interest]
 
 
366
 
367
  OUTCOMES:
368
- - [Primary outcomes of interest]
 
 
 
369
 
370
  STUDY DESIGN:
371
  - Randomized controlled trials
 
372
  - Cohort studies
373
- - Case-control studies
374
 
375
  EXCLUDE:
376
  - Animal studies
 
377
  - Case reports
378
- - Reviews (unless relevant)""",
379
- lines=15
 
380
  )
381
 
382
- stage1_process_btn = gr.Button("πŸš€ Start Stage 1 Screening", variant="primary")
 
 
 
 
 
 
383
 
384
  stage1_results = gr.Markdown()
385
- stage1_table = gr.Dataframe(label="Stage 1 Results")
 
 
 
386
  stage1_download_data = gr.Textbox(visible=False)
387
- stage1_download_btn = gr.DownloadButton(label="πŸ’Ύ Download Stage 1 Results", visible=False)
388
-
389
- # STAGE 2 TAB
390
- with gr.TabItem("πŸ“„ Stage 2: Full-Text Screening"):
391
- with gr.Row():
392
- with gr.Column(scale=1):
393
- gr.Markdown("### πŸ“ Upload Stage 1 Results or Full-Text Data")
394
-
395
- stage2_file = gr.File(
396
- label="Upload Stage 1 Results or Studies with Full Text",
397
- file_types=[".csv"],
398
- type="filepath"
399
- )
400
-
401
- with gr.Row():
402
- stage2_title_col = gr.Dropdown(label="Title Column", choices=[], interactive=True)
403
- stage2_abstract_col = gr.Dropdown(label="Abstract Column", choices=[], interactive=True)
404
-
405
- stage2_fulltext_col = gr.Dropdown(label="Full Text Column", choices=[], interactive=True)
406
- stage2_sample = gr.Slider(label="Studies to Process", minimum=5, maximum=200, value=50, step=5)
407
-
408
- with gr.Column(scale=1):
409
- gr.Markdown("### 🎯 Stage 2 Criteria (Strict/Specific)")
410
-
411
- stage2_criteria = gr.Textbox(
412
- label="Detailed Inclusion/Exclusion Criteria for Stage 2",
413
- value="""POPULATION:
414
- - [Specific population criteria]
415
- - [Age ranges, conditions, etc.]
416
-
417
- INTERVENTION:
418
- - [Detailed intervention specifications]
419
- - [Dosage, duration, delivery method]
420
-
421
- COMPARATOR:
422
- - [Control group specifications]
423
- - [Placebo, standard care, etc.]
424
-
425
- OUTCOMES:
426
- - [Primary endpoint definitions]
427
- - [Secondary outcomes]
428
- - [Measurement methods]
429
-
430
- STUDY DESIGN:
431
- - [Minimum study quality requirements]
432
- - [Follow-up duration requirements]
433
-
434
- EXCLUDE:
435
- - [Specific exclusion criteria]
436
- - [Study quality thresholds]""",
437
- lines=15
438
- )
439
-
440
- extraction_fields = gr.Textbox(
441
- label="Data Extraction Fields (Optional)",
442
- value="""Sample Size: participants, subjects, patients, n=
443
- Intervention Duration: weeks, months, days, duration
444
- Primary Outcome: endpoint, primary outcome, main outcome
445
- Statistical Method: analysis, statistical, regression, model
446
- Risk of Bias: randomization, blinding, allocation""",
447
- lines=8
448
- )
449
-
450
- stage2_process_btn = gr.Button("πŸ” Start Stage 2 Screening", variant="primary")
451
-
452
- stage2_results = gr.Markdown()
453
- stage2_table = gr.Dataframe(label="Stage 2 Results with Data Extraction")
454
- stage2_download_data = gr.Textbox(visible=False)
455
- stage2_download_btn = gr.DownloadButton(label="πŸ’Ύ Download Final Results", visible=False)
456
 
457
- # WORKFLOW GUIDANCE TAB
458
- with gr.TabItem("πŸ“š Systematic Review Workflow"):
459
  gr.Markdown("""
460
- ## πŸ”„ Complete 2-Stage Systematic Review Process
461
-
462
- ### **Stage 1: Title/Abstract Screening**
463
- **Objective:** High sensitivity screening to identify potentially relevant studies
464
-
465
- **Process:**
466
- 1. Upload search results from multiple databases (PubMed, Embase, etc.)
467
- 2. Define broad inclusion/exclusion criteria
468
- 3. AI screens titles/abstracts with high sensitivity
469
- 4. Manually review "UNCLEAR" classifications
470
- 5. Export studies marked for inclusion to Stage 2
471
-
472
- **Criteria Guidelines:**
473
- - Use broad terms to capture all potentially relevant studies
474
- - Focus on key PICOS elements (Population, Intervention, Outcomes)
475
- - Err on the side of inclusion when uncertain
476
-
477
- ### **Stage 2: Full-Text Screening**
478
- **Objective:** High specificity screening with detailed data extraction
479
 
480
- **Process:**
481
- 1. Upload Stage 1 results or add full-text content
482
- 2. Define strict, specific inclusion/exclusion criteria
483
- 3. AI performs detailed full-text analysis
484
- 4. Extract key data points for synthesis
485
- 5. Export final included studies for meta-analysis
486
 
487
- **Criteria Guidelines:**
488
- - Use specific, measurable criteria
489
- - Include detailed PICOS specifications
490
- - Define minimum quality thresholds
491
- - Specify exact outcome measurements needed
492
 
493
- ### **Quality Assurance Recommendations:**
 
 
 
494
 
495
- **For Stage 1:**
496
- - Manual review of 10-20% of AI decisions
497
- - Inter-rater reliability testing with subset
498
- - Calibration exercises among reviewers
499
 
500
- **For Stage 2:**
501
- - Manual validation of all AI INCLUDE decisions
502
- - Detailed reason documentation for exclusions
503
- - Data extraction verification by second reviewer
504
 
505
- ### **After 2-Stage Screening:**
 
 
 
 
506
 
507
- 1. **Data Extraction:** Extract detailed study characteristics
508
- 2. **Quality Assessment:** Apply ROB2, ROBINS-I, or other tools
509
- 3. **Evidence Synthesis:** Qualitative synthesis and meta-analysis
510
- 4. **GRADE Assessment:** Evaluate certainty of evidence
511
- 5. **Reporting:** Follow PRISMA guidelines
512
-
513
- ### **Best Practices:**
514
-
515
- - **Document everything:** Keep detailed logs of decisions and criteria
516
- - **Validate AI decisions:** Use AI as assistance, not replacement
517
- - **Follow guidelines:** Adhere to Cochrane and PRISMA standards
518
- - **Test criteria:** Pilot with known studies before full screening
519
- - **Multiple reviewers:** Have disagreements resolved by third reviewer
520
-
521
- ### **When to Use Each Stage:**
522
-
523
- **Use Stage 1 when:**
524
- - Starting with large search results (>1000 studies)
525
- - Need to quickly filter irrelevant studies
526
- - Working with title/abstract data only
527
-
528
- **Use Stage 2 when:**
529
- - Have full-text access to studies
530
- - Need detailed inclusion/exclusion assessment
531
- - Ready for data extraction
532
- - Preparing for meta-analysis
533
  """)
534
 
535
- # Event handlers for file uploads and column detection
536
  def update_stage1_columns(file):
537
  if file is None:
538
  return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
@@ -545,50 +689,31 @@ Risk of Bias: randomization, blinding, allocation""",
545
  except:
546
  return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
547
 
548
- def update_stage2_columns(file):
549
- if file is None:
550
- return gr.Dropdown(choices=[]), gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
551
- try:
552
- df = pd.read_csv(file.name)
553
- columns = df.columns.tolist()
554
- title_col = next((col for col in columns if 'title' in col.lower()), columns[0] if columns else None)
555
- abstract_col = next((col for col in columns if 'abstract' in col.lower()), columns[1] if len(columns) > 1 else None)
556
- fulltext_col = next((col for col in columns if any(term in col.lower() for term in ['full_text', 'fulltext', 'text', 'content'])), None)
557
- return (gr.Dropdown(choices=columns, value=title_col),
558
- gr.Dropdown(choices=columns, value=abstract_col),
559
- gr.Dropdown(choices=columns, value=fulltext_col))
560
- except:
561
- return gr.Dropdown(choices=[]), gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
562
-
563
- # Event bindings
564
- stage1_file.change(fn=update_stage1_columns, inputs=[stage1_file], outputs=[stage1_title_col, stage1_abstract_col])
565
- stage2_file.change(fn=update_stage2_columns, inputs=[stage2_file], outputs=[stage2_title_col, stage2_abstract_col, stage2_fulltext_col])
566
-
567
- def process_stage1_with_download(*args):
568
- summary, table, csv_data = process_stage1(*args)
569
- return summary, table, csv_data, gr.DownloadButton(visible=bool(csv_data))
570
 
571
- def process_stage2_with_download(*args):
572
- summary, table, csv_data = process_stage2(*args)
573
  return summary, table, csv_data, gr.DownloadButton(visible=bool(csv_data))
574
 
575
  stage1_process_btn.click(
576
- fn=process_stage1_with_download,
577
  inputs=[stage1_file, stage1_title_col, stage1_abstract_col, stage1_criteria, stage1_sample],
578
  outputs=[stage1_results, stage1_table, stage1_download_data, stage1_download_btn]
579
  )
580
 
581
- stage2_process_btn.click(
582
- fn=process_stage2_with_download,
583
- inputs=[stage2_file, stage2_title_col, stage2_abstract_col, stage2_fulltext_col, stage2_criteria, extraction_fields, stage2_sample],
584
- outputs=[stage2_results, stage2_table, stage2_download_data, stage2_download_btn]
585
  )
586
-
587
- stage1_download_btn.click(lambda data: data, inputs=[stage1_download_data], outputs=[gr.File()])
588
- stage2_download_btn.click(lambda data: data, inputs=[stage2_download_data], outputs=[gr.File()])
589
 
590
  return interface
591
 
592
  if __name__ == "__main__":
593
- interface = create_interface()
594
- interface.launch()
 
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import numpy as np
 
 
4
  import torch
5
+ from transformers import (
6
+ pipeline,
7
+ AutoTokenizer,
8
+ AutoModel,
9
+ AutoModelForSequenceClassification
10
+ )
11
+ from sentence_transformers import SentenceTransformer, CrossEncoder
12
  import re
13
+ from typing import List, Dict, Tuple, Optional
14
+ import warnings
15
+ warnings.filterwarnings('ignore')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # ============================================================================
18
+ # ADVANCED MODEL INITIALIZATION
19
+ # ============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ class AdvancedMedicalScreener:
22
+ def __init__(self):
23
+ """Initialize all advanced NLP models for medical literature screening"""
24
+ print("πŸš€ Initializing Advanced Medical Screening Models...")
25
+
26
+ # 1. Biomedical language model for embeddings
27
+ print("Loading PubMedBERT for medical text understanding...")
28
+ self.pubmed_tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
29
+ self.pubmed_model = AutoModel.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
30
+
31
+ # 2. Cross-encoder for accurate semantic similarity
32
+ print("Loading Cross-Encoder for semantic matching...")
33
+ self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2', max_length=512)
34
+
35
+ # 3. Zero-shot classifier for criteria matching
36
+ print("Loading Zero-Shot Classifier...")
37
+ self.zero_shot = pipeline(
38
+ "zero-shot-classification",
39
+ model="facebook/bart-large-mnli",
40
+ device=0 if torch.cuda.is_available() else -1
41
+ )
42
+
43
+ # 4. Sentence transformer for fast similarity
44
+ print("Loading Sentence Transformer...")
45
+ self.sentence_model = SentenceTransformer('pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb')
46
+
47
+ # 5. Medical NER for entity extraction (optional, lightweight)
48
+ print("Loading Medical NER model...")
49
+ try:
50
+ self.ner_pipeline = pipeline(
51
+ "ner",
52
+ model="dmis-lab/biobert-base-cased-v1.2",
53
+ aggregation_strategy="simple"
54
+ )
55
+ except:
56
+ self.ner_pipeline = None
57
+ print("Note: Medical NER model not available, using fallback")
58
+
59
+ print("βœ… All models loaded successfully!")
60
+
61
+ # Medical terminology expansions
62
+ self.medical_synonyms = {
63
+ 'rct': ['randomized controlled trial', 'randomised controlled trial', 'randomized clinical trial'],
64
+ 'pain': ['pain', 'nociception', 'analgesia', 'hyperalgesia', 'allodynia', 'neuropathic pain',
65
+ 'chronic pain', 'acute pain', 'postoperative pain', 'pain management'],
66
+ 'surgery': ['surgery', 'surgical', 'operation', 'operative', 'postoperative', 'perioperative',
67
+ 'preoperative', 'surgical procedure', 'surgical intervention'],
68
+ 'study design': ['study design', 'trial design', 'research design', 'methodology',
69
+ 'randomized', 'controlled', 'cohort', 'case-control', 'cross-sectional',
70
+ 'prospective', 'retrospective', 'observational', 'experimental'],
71
+ 'systematic review': ['systematic review', 'meta-analysis', 'meta analysis', 'evidence synthesis'],
72
+ 'case report': ['case report', 'case study', 'case series', 'case presentation'],
73
+ 'clinical trial': ['clinical trial', 'clinical study', 'trial', 'intervention study'],
74
+ }
75
+
76
+ # Study design hierarchy for classification
77
+ self.study_designs = {
78
+ 'high_quality': ['randomized controlled trial', 'systematic review', 'meta-analysis'],
79
+ 'moderate_quality': ['cohort study', 'case-control study', 'controlled trial'],
80
+ 'low_quality': ['case report', 'case series', 'opinion', 'editorial'],
81
+ 'observational': ['cohort', 'case-control', 'cross-sectional', 'observational'],
82
+ 'experimental': ['randomized', 'experimental', 'intervention', 'trial']
83
+ }
84
 
85
+ def get_pubmed_embedding(self, text: str) -> np.ndarray:
86
+ """Get PubMedBERT embedding for medical text"""
87
+ inputs = self.pubmed_tokenizer(
88
+ text,
89
+ return_tensors="pt",
90
+ truncation=True,
91
+ max_length=512,
92
+ padding=True
93
+ )
94
+
95
+ with torch.no_grad():
96
+ outputs = self.pubmed_model(**inputs)
97
+ # Use CLS token embedding
98
+ embedding = outputs.last_hidden_state[:, 0, :].numpy()
99
+
100
+ return embedding.squeeze()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ def expand_medical_terms(self, term: str) -> List[str]:
103
+ """Expand medical terms with synonyms and related concepts"""
104
+ term_lower = term.lower()
105
+ expanded = [term]
106
+
107
+ # Check for known medical synonyms
108
+ for key, synonyms in self.medical_synonyms.items():
109
+ if key in term_lower or any(syn in term_lower for syn in synonyms):
110
+ expanded.extend(synonyms)
111
+
112
+ # Add variations
113
+ if 'pain' in term_lower:
114
+ expanded.extend(['analgesic', 'nociceptive', 'painful'])
115
+ if 'surgery' in term_lower or 'surgical' in term_lower:
116
+ expanded.extend(['surgeon', 'resection', 'excision', 'incision'])
117
+
118
+ return list(set(expanded))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
+ def parse_advanced_criteria(self, criteria_text: str) -> Dict:
121
+ """Advanced parsing of inclusion/exclusion criteria with medical understanding"""
122
+ criteria = {
123
+ 'population': [],
124
+ 'intervention': [],
125
+ 'comparator': [],
126
+ 'outcomes': [],
127
+ 'study_design': [],
128
+ 'include_general': [],
129
+ 'exclude_general': [],
130
+ 'pain_related': [],
131
+ 'surgery_related': []
132
+ }
133
 
134
+ lines = criteria_text.split('\n')
135
+ current_section = None
136
+ is_exclusion = False
137
+
138
+ for line in lines:
139
+ line_clean = line.strip()
140
+ line_lower = line_clean.lower()
141
 
142
+ if not line_clean:
143
  continue
144
 
145
+ # Detect exclusion context
146
+ if 'exclude' in line_lower:
147
+ is_exclusion = True
148
+ current_section = 'exclude_general'
149
+ elif 'include' in line_lower:
150
+ is_exclusion = False
151
+ current_section = 'include_general'
152
 
153
+ # Detect PICOS sections
154
+ elif any(term in line_lower for term in ['population:', 'participants:', 'patients:']):
155
+ current_section = 'population'
156
+ elif any(term in line_lower for term in ['intervention:', 'exposure:', 'treatment:']):
157
+ current_section = 'intervention'
158
+ elif any(term in line_lower for term in ['comparator:', 'control:', 'comparison:']):
159
+ current_section = 'comparator'
160
+ elif any(term in line_lower for term in ['outcome:', 'endpoint:', 'measure:']):
161
+ current_section = 'outcomes'
162
+ elif any(term in line_lower for term in ['study design:', 'design:', 'study type:', 'methodology:']):
163
+ current_section = 'study_design'
164
+
165
+ # Special detection for pain and surgery
166
+ elif 'pain' in line_lower:
167
+ current_section = 'pain_related'
168
+ elif any(term in line_lower for term in ['surgery', 'surgical', 'operation']):
169
+ current_section = 'surgery_related'
170
+
171
+ # Extract criteria items
172
+ elif current_section:
173
+ # Handle bullet points or dashes
174
+ if line_clean.startswith(('-', 'β€’', '*', 'Β·')):
175
+ item = line_clean[1:].strip()
176
+ if item:
177
+ # Expand medical terms
178
+ expanded_items = self.expand_medical_terms(item)
179
+ criteria[current_section].extend(expanded_items)
180
+ # Handle comma-separated items
181
+ elif ',' in line_clean and ':' not in line_clean:
182
+ items = [i.strip() for i in line_clean.split(',')]
183
+ for item in items:
184
+ if item and len(item) > 2:
185
+ expanded_items = self.expand_medical_terms(item)
186
+ criteria[current_section].extend(expanded_items)
187
+ # Handle single items
188
+ elif line_clean and not any(marker in line_lower for marker in [':', 'population', 'intervention', 'outcome']):
189
+ expanded_items = self.expand_medical_terms(line_clean)
190
+ criteria[current_section].extend(expanded_items)
191
 
192
+ # Remove duplicates
193
+ for key in criteria:
194
+ criteria[key] = list(set(criteria[key]))
195
 
196
+ return criteria
197
+
198
+ def cross_encoder_score(self, text: str, criteria: str) -> float:
199
+ """Calculate cross-encoder similarity score"""
200
+ try:
201
+ score = self.cross_encoder.predict([[text, criteria]])
202
+ # Normalize to 0-1 range
203
+ return float(1 / (1 + np.exp(-score[0])))
204
+ except:
205
+ return 0.0
206
+
207
+ def zero_shot_classify(self, text: str, labels: List[str], hypothesis_template: str = "This study is about {}") -> Dict:
208
+ """Perform zero-shot classification with custom hypothesis"""
209
+ if not labels:
210
+ return {}
211
 
212
+ try:
213
+ result = self.zero_shot(
214
+ text,
215
+ candidate_labels=labels,
216
+ hypothesis_template=hypothesis_template,
217
+ multi_label=True
218
+ )
219
+
220
+ # Convert to dictionary with scores
221
+ scores = {}
222
+ for label, score in zip(result['labels'], result['scores']):
223
+ scores[label] = score
224
+ return scores
225
+ except:
226
+ return {}
227
 
228
+ def evaluate_study_design(self, text: str) -> Dict:
229
+ """Evaluate study design quality and type"""
230
+ design_labels = [
231
+ 'randomized controlled trial',
232
+ 'systematic review',
233
+ 'meta-analysis',
234
+ 'cohort study',
235
+ 'case-control study',
236
+ 'cross-sectional study',
237
+ 'case report',
238
+ 'observational study',
239
+ 'experimental study'
240
+ ]
241
+
242
+ scores = self.zero_shot_classify(
243
+ text,
244
+ design_labels,
245
+ hypothesis_template="This is a {}"
246
+ )
247
+
248
+ # Determine quality level
249
+ quality = 'unknown'
250
+ max_design = max(scores.items(), key=lambda x: x[1])[0] if scores else ''
251
+
252
+ for level, designs in self.study_designs.items():
253
+ if any(design in max_design.lower() for design in designs):
254
+ quality = level
255
+ break
256
+
257
+ return {
258
+ 'design_scores': scores,
259
+ 'primary_design': max_design,
260
+ 'quality_level': quality
261
+ }
262
 
263
+ def evaluate_pain_surgery_relevance(self, text: str) -> Dict:
264
+ """Specifically evaluate pain and surgery relevance"""
265
+ # Pain-related evaluation
266
+ pain_terms = [
267
+ 'chronic pain', 'acute pain', 'postoperative pain',
268
+ 'pain management', 'analgesia', 'neuropathic pain',
269
+ 'pain relief', 'pain control', 'pain assessment'
270
+ ]
271
 
272
+ pain_scores = self.zero_shot_classify(
273
+ text,
274
+ pain_terms,
275
+ hypothesis_template="This study involves {}"
276
+ )
277
 
278
+ # Surgery-related evaluation
279
+ surgery_terms = [
280
+ 'surgical procedure', 'postoperative', 'perioperative',
281
+ 'surgical intervention', 'operation', 'surgical outcomes',
282
+ 'surgical complications', 'surgical technique'
283
+ ]
284
+
285
+ surgery_scores = self.zero_shot_classify(
286
+ text,
287
+ surgery_terms,
288
+ hypothesis_template="This study involves {}"
289
+ )
290
+
291
+ return {
292
+ 'pain_relevance': max(pain_scores.values()) if pain_scores else 0,
293
+ 'surgery_relevance': max(surgery_scores.values()) if surgery_scores else 0,
294
+ 'pain_terms': pain_scores,
295
+ 'surgery_terms': surgery_scores
296
+ }
297
 
298
+ def stage1_advanced_classification(self, title: str, abstract: str, criteria_text: str) -> Dict:
299
+ """Advanced Stage 1 classification using multiple NLP models"""
300
+
301
+ # Combine text
302
+ study_text = f"{title} {abstract}"
303
+ if len(study_text.strip()) < 20:
304
+ return {
305
+ 'decision': 'UNCLEAR',
306
+ 'confidence': 0,
307
+ 'reasoning': 'Insufficient text for analysis',
308
+ 'detailed_scores': {}
309
+ }
310
+
311
+ # Parse criteria with medical understanding
312
+ criteria = self.parse_advanced_criteria(criteria_text)
313
 
314
+ # Initialize scoring components
315
+ scores = {
316
+ 'population': 0,
317
+ 'intervention': 0,
318
+ 'comparator': 0,
319
+ 'outcomes': 0,
320
+ 'study_design': 0,
321
+ 'inclusion': 0,
322
+ 'exclusion': 0,
323
+ 'pain_relevance': 0,
324
+ 'surgery_relevance': 0
325
+ }
326
 
327
+ reasoning_parts = []
328
+
329
+ # 1. Evaluate PICOS elements using cross-encoder
330
+ for element in ['population', 'intervention', 'comparator', 'outcomes']:
331
+ if criteria[element]:
332
+ element_scores = []
333
+ for criterion in criteria[element][:5]: # Limit to top 5 to avoid overload
334
+ score = self.cross_encoder_score(study_text, criterion)
335
+ element_scores.append(score)
336
+
337
+ if element_scores:
338
+ scores[element] = max(element_scores)
339
+ if scores[element] > 0.5:
340
+ best_match = criteria[element][element_scores.index(max(element_scores))]
341
+ reasoning_parts.append(f"{element.capitalize()}: '{best_match}' ({scores[element]:.2f})")
342
+
343
+ # 2. Evaluate study design
344
+ design_eval = self.evaluate_study_design(study_text)
345
+ scores['study_design'] = max(design_eval['design_scores'].values()) if design_eval['design_scores'] else 0
346
+ if scores['study_design'] > 0.5:
347
+ reasoning_parts.append(f"Study Design: {design_eval['primary_design']} ({scores['study_design']:.2f})")
348
+
349
+ # 3. Evaluate pain and surgery relevance if applicable
350
+ if criteria['pain_related'] or 'pain' in criteria_text.lower():
351
+ pain_surgery_eval = self.evaluate_pain_surgery_relevance(study_text)
352
+ scores['pain_relevance'] = pain_surgery_eval['pain_relevance']
353
+ if scores['pain_relevance'] > 0.5:
354
+ reasoning_parts.append(f"Pain Relevance: {scores['pain_relevance']:.2f}")
355
+
356
+ if criteria['surgery_related'] or 'surgery' in criteria_text.lower():
357
+ pain_surgery_eval = self.evaluate_pain_surgery_relevance(study_text)
358
+ scores['surgery_relevance'] = pain_surgery_eval['surgery_relevance']
359
+ if scores['surgery_relevance'] > 0.5:
360
+ reasoning_parts.append(f"Surgery Relevance: {scores['surgery_relevance']:.2f}")
361
+
362
+ # 4. Evaluate inclusion criteria
363
+ if criteria['include_general']:
364
+ inclusion_scores = []
365
+ for criterion in criteria['include_general'][:3]:
366
+ score = self.cross_encoder_score(study_text, criterion)
367
+ inclusion_scores.append(score)
368
+ scores['inclusion'] = max(inclusion_scores) if inclusion_scores else 0
369
+ if scores['inclusion'] > 0.5:
370
+ reasoning_parts.append(f"Inclusion Match: {scores['inclusion']:.2f}")
371
+
372
+ # 5. Evaluate exclusion criteria
373
+ if criteria['exclude_general']:
374
+ exclusion_scores = []
375
+ for criterion in criteria['exclude_general'][:3]:
376
+ score = self.cross_encoder_score(study_text, criterion)
377
+ exclusion_scores.append(score)
378
+ scores['exclusion'] = max(exclusion_scores) if exclusion_scores else 0
379
+ if scores['exclusion'] > 0.6:
380
+ reasoning_parts.append(f"EXCLUSION Match: {scores['exclusion']:.2f}")
381
+
382
+ # 6. Check for low-quality study designs
383
+ if design_eval.get('quality_level') == 'low_quality':
384
+ scores['exclusion'] = max(scores['exclusion'], 0.7)
385
+ reasoning_parts.append(f"Low Quality Design: {design_eval['primary_design']}")
386
+
387
+ # Decision Logic with Confidence Calibration
388
+ decision, confidence = self._make_decision_stage1(scores, design_eval)
389
+
390
+ # Format reasoning
391
+ if not reasoning_parts:
392
+ reasoning_parts.append("No strong matches found")
393
+ reasoning = f"Stage 1 {decision}: {'; '.join(reasoning_parts)}"
394
+
395
+ return {
396
+ 'decision': decision,
397
+ 'confidence': confidence,
398
+ 'reasoning': reasoning,
399
+ 'detailed_scores': scores,
400
+ 'study_design': design_eval.get('primary_design', 'Unknown'),
401
+ 'quality_level': design_eval.get('quality_level', 'Unknown')
402
+ }
403
+
404
+ def _make_decision_stage1(self, scores: Dict, design_eval: Dict) -> Tuple[str, int]:
405
+ """Make Stage 1 decision based on scores with calibrated confidence"""
406
+
407
+ # Strong exclusion criteria
408
+ if scores['exclusion'] > 0.65:
409
+ confidence = min(int(scores['exclusion'] * 100), 90)
410
+ return 'EXCLUDE', confidence
411
+
412
+ # Low quality design exclusion
413
+ if design_eval.get('quality_level') == 'low_quality' and scores['study_design'] > 0.7:
414
+ return 'EXCLUDE', 75
415
+
416
+ # Calculate inclusion strength
417
+ picos_scores = [scores['population'], scores['intervention'], scores['outcomes']]
418
+ relevant_picos = sum(1 for s in picos_scores if s > 0.5)
419
+ avg_picos = np.mean([s for s in picos_scores if s > 0.3]) if any(s > 0.3 for s in picos_scores) else 0
420
+
421
+ # Strong inclusion - multiple PICOS matches
422
+ if relevant_picos >= 2 and avg_picos > 0.6:
423
+ confidence = min(int(avg_picos * 85), 85)
424
+ return 'INCLUDE', confidence
425
+
426
+ # Moderate inclusion - some relevant matches
427
+ if relevant_picos >= 1 or scores['inclusion'] > 0.6:
428
+ best_score = max(scores['population'], scores['intervention'], scores['outcomes'], scores['inclusion'])
429
+ confidence = min(int(best_score * 75), 75)
430
+ return 'INCLUDE', confidence
431
+
432
+ # Special consideration for pain/surgery studies
433
+ if (scores['pain_relevance'] > 0.6 or scores['surgery_relevance'] > 0.6) and \
434
+ design_eval.get('quality_level') in ['high_quality', 'moderate_quality']:
435
+ confidence = 70
436
+ return 'INCLUDE', confidence
437
+
438
+ # Weak matches - need manual review
439
+ if any(s > 0.4 for s in [scores['population'], scores['intervention'], scores['outcomes']]):
440
+ return 'UNCLEAR', 50
441
+
442
+ # No relevant matches
443
+ return 'EXCLUDE', 60
444
+
445
+
446
+ # ============================================================================
447
+ # GRADIO INTERFACE FUNCTIONS
448
+ # ============================================================================
449
+
450
+ # Initialize the screener globally
451
+ screener = None
452
+
453
+ def initialize_screener():
454
+ """Initialize the screener if not already done"""
455
+ global screener
456
+ if screener is None:
457
+ screener = AdvancedMedicalScreener()
458
+ return screener
459
+
460
+ def process_stage1_advanced(file, title_col, abstract_col, criteria, sample_size):
461
+ """Process Stage 1 screening with advanced NLP models"""
462
+ try:
463
+ # Initialize screener
464
+ model = initialize_screener()
465
+
466
+ # Read CSV
467
+ df = pd.read_csv(file.name)
468
  if sample_size < len(df):
469
  df = df.head(sample_size)
470
 
 
 
 
 
 
 
 
 
471
  results = []
472
  for idx, row in df.iterrows():
473
  title = str(row[title_col]) if pd.notna(row[title_col]) else ""
474
  abstract = str(row[abstract_col]) if pd.notna(row[abstract_col]) else ""
 
475
 
476
  if not title and not abstract:
477
  continue
478
 
479
+ # Use advanced classification
480
+ classification = model.stage1_advanced_classification(title, abstract, criteria)
481
 
482
  result = {
483
  'Study_ID': idx + 1,
484
  'Title': title[:100] + "..." if len(title) > 100 else title,
485
+ 'Stage1_Decision': classification['decision'],
486
+ 'Stage1_Confidence': f"{classification['confidence']}%",
487
+ 'Study_Design': classification.get('study_design', 'Unknown'),
488
+ 'Quality_Level': classification.get('quality_level', 'Unknown'),
489
+ 'Stage1_Reasoning': classification['reasoning'],
490
+ 'Ready_for_Stage2': 'Yes' if classification['decision'] == 'INCLUDE' else 'No',
491
  'Full_Title': title,
492
+ 'Full_Abstract': abstract
 
493
  }
494
  results.append(result)
495
 
496
  results_df = pd.DataFrame(results)
497
 
498
+ # Generate summary
499
  total = len(results_df)
500
+ included = len(results_df[results_df['Stage1_Decision'] == 'INCLUDE'])
501
+ excluded = len(results_df[results_df['Stage1_Decision'] == 'EXCLUDE'])
502
+ unclear = len(results_df[results_df['Stage1_Decision'] == 'UNCLEAR'])
503
+
504
+ # Quality breakdown
505
+ quality_counts = results_df['Quality_Level'].value_counts().to_dict()
506
+ quality_summary = "\n".join([f" - {level}: {count}" for level, count in quality_counts.items()])
507
 
508
  summary = f"""
509
+ ## πŸ“Š Advanced Stage 1 Results (AI-Powered Medical Screening)
510
+
511
+ **Screening Complete with Advanced NLP Models:**
512
+ - **Total Studies Analyzed:** {total}
513
+ - **βœ… Include for Stage 2:** {included} ({included/total*100:.1f}%)
514
+ - **❌ Exclude:** {excluded} ({excluded/total*100:.1f}%)
515
+ - **⚠️ Needs Manual Review:** {unclear} ({unclear/total*100:.1f}%)
516
 
517
+ **Study Quality Distribution:**
518
+ {quality_summary}
 
 
519
 
520
+ **Models Used:**
521
+ - PubMedBERT for medical text understanding
522
+ - Cross-encoder for semantic similarity
523
+ - Zero-shot classification for criteria matching
524
+ - Medical NER for entity extraction
525
 
526
+ **Next Steps:**
527
+ 1. Review {unclear} studies marked as UNCLEAR
528
+ 2. Proceed to Stage 2 with {included} included studies
529
+ 3. Consider manual validation of borderline cases
530
  """
531
 
532
  return summary, results_df, results_df.to_csv(index=False)
 
534
  except Exception as e:
535
  return f"Error: {str(e)}", None, ""
536
 
537
+ def create_advanced_interface():
538
+ """Create the Gradio interface with advanced NLP capabilities"""
539
+ with gr.Blocks(title="πŸ”¬ Advanced Medical Literature Screening", theme=gr.themes.Soft()) as interface:
540
 
541
  gr.Markdown("""
542
+ # πŸ”¬ Advanced Medical Literature Screening with AI
543
 
544
+ **State-of-the-art NLP models for systematic review screening**
545
 
546
+ This tool uses advanced transformer models specifically trained on medical literature:
547
+ - **PubMedBERT**: Understands medical terminology and concepts
548
+ - **Cross-Encoders**: Accurate semantic matching for criteria
549
+ - **Zero-Shot Classification**: Flexible criteria evaluation
550
+ - **Medical NER**: Extracts medical entities automatically
551
+
552
+ Optimized for **pain**, **surgery**, and **study design** criteria, with general medical understanding.
553
  """)
554
 
555
  with gr.Tabs():
556
 
557
  # STAGE 1 TAB
558
+ with gr.TabItem("πŸ“‹ Stage 1: Advanced Title/Abstract Screening"):
559
  with gr.Row():
560
  with gr.Column(scale=1):
561
  gr.Markdown("### πŸ“ Upload Study Data")
 
570
  stage1_title_col = gr.Dropdown(label="Title Column", choices=[], interactive=True)
571
  stage1_abstract_col = gr.Dropdown(label="Abstract Column", choices=[], interactive=True)
572
 
573
+ stage1_sample = gr.Slider(
574
+ label="Studies to Process",
575
+ minimum=5,
576
+ maximum=500,
577
+ value=100,
578
+ step=5,
579
+ info="Processing time increases with more studies"
580
+ )
581
 
582
  with gr.Column(scale=1):
583
+ gr.Markdown("### 🎯 Inclusion/Exclusion Criteria")
584
 
585
  stage1_criteria = gr.Textbox(
586
+ label="Enter your criteria (understands medical terminology)",
587
  value="""POPULATION:
588
+ - Adult patients
589
+ - Chronic pain patients
590
+ - Surgical patients
591
 
592
  INTERVENTION:
593
+ - Pain management interventions
594
+ - Surgical procedures
595
+ - Analgesic treatments
596
 
597
  OUTCOMES:
598
+ - Pain intensity
599
+ - Pain relief
600
+ - Functional outcomes
601
+ - Quality of life
602
 
603
  STUDY DESIGN:
604
  - Randomized controlled trials
605
+ - Systematic reviews
606
  - Cohort studies
607
+ - NOT case reports
608
 
609
  EXCLUDE:
610
  - Animal studies
611
+ - Pediatric only
612
  - Case reports
613
+ - Editorials""",
614
+ lines=20,
615
+ info="The AI understands medical synonyms and related terms"
616
  )
617
 
618
+ with gr.Row():
619
+ stage1_process_btn = gr.Button(
620
+ "πŸš€ Start Advanced AI Screening",
621
+ variant="primary",
622
+ scale=2
623
+ )
624
+ gr.Markdown("*First run may take longer to load models*", scale=1)
625
 
626
  stage1_results = gr.Markdown()
627
+ stage1_table = gr.Dataframe(
628
+ label="Stage 1 Results with Quality Assessment",
629
+ wrap=True
630
+ )
631
  stage1_download_data = gr.Textbox(visible=False)
632
+ stage1_download_btn = gr.DownloadButton(
633
+ label="πŸ’Ύ Download Stage 1 Results",
634
+ visible=False
635
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
636
 
637
+ # HELP TAB
638
+ with gr.TabItem("❓ Help & Guidelines"):
639
  gr.Markdown("""
640
+ ## πŸ€– Advanced Features Explained
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
641
 
642
+ ### **Medical Understanding**
643
+ The system automatically:
644
+ - Recognizes medical synonyms (e.g., RCT = randomized controlled trial)
645
+ - Understands pain-related terms (nociception, analgesia, hyperalgesia)
646
+ - Identifies surgical concepts (perioperative, postoperative, resection)
647
+ - Evaluates study quality based on design
648
 
649
+ ### **How to Write Effective Criteria**
 
 
 
 
650
 
651
+ 1. **Be specific but comprehensive:**
652
+ - βœ… "chronic pain lasting > 3 months"
653
+ - βœ… "postoperative pain management"
654
+ - ❌ "pain" (too vague)
655
 
656
+ 2. **Use medical terms freely:**
657
+ - The AI understands medical terminology
658
+ - It will automatically expand terms with synonyms
659
+ - Example: "surgery" β†’ surgical, operation, resection, etc.
660
 
661
+ 3. **Specify study designs clearly:**
662
+ - High quality: RCT, systematic review, meta-analysis
663
+ - Moderate: cohort, case-control
664
+ - Low: case reports, opinions
665
 
666
+ ### **Confidence Scores**
667
+ - **80-100%**: Strong match, high confidence
668
+ - **60-79%**: Good match, moderate confidence
669
+ - **40-59%**: Weak match, needs review
670
+ - **0-39%**: Poor match, likely exclude
671
 
672
+ ### **Tips for Best Results**
673
+ - Include both inclusion AND exclusion criteria
674
+ - Specify population, intervention, and outcomes
675
+ - Mention specific study designs to include/exclude
676
+ - The AI works best with complete abstracts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
  """)
678
 
679
+ # Event handlers
680
  def update_stage1_columns(file):
681
  if file is None:
682
  return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
 
689
  except:
690
  return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
691
 
692
+ stage1_file.change(
693
+ fn=update_stage1_columns,
694
+ inputs=[stage1_file],
695
+ outputs=[stage1_title_col, stage1_abstract_col]
696
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
 
698
+ def process_with_download(*args):
699
+ summary, table, csv_data = process_stage1_advanced(*args)
700
  return summary, table, csv_data, gr.DownloadButton(visible=bool(csv_data))
701
 
702
  stage1_process_btn.click(
703
+ fn=process_with_download,
704
  inputs=[stage1_file, stage1_title_col, stage1_abstract_col, stage1_criteria, stage1_sample],
705
  outputs=[stage1_results, stage1_table, stage1_download_data, stage1_download_btn]
706
  )
707
 
708
+ stage1_download_btn.click(
709
+ lambda data: data,
710
+ inputs=[stage1_download_data],
711
+ outputs=[gr.File()]
712
  )
 
 
 
713
 
714
  return interface
715
 
716
  if __name__ == "__main__":
717
+ print("Starting Advanced Medical Literature Screening System...")
718
+ interface = create_advanced_interface()
719
+ interface.launch()
requirements.txt CHANGED
@@ -1,6 +1,21 @@
1
- gradio
2
- pandas
3
- transformers
4
- torch
5
- requests
6
- numpy
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ gradio>=3.40.0
3
+ pandas>=1.3.0
4
+ numpy>=1.21.0
5
+ requests>=2.28.0
6
+
7
+ # Deep Learning frameworks
8
+ torch>=1.9.0
9
+ transformers>=4.30.0
10
+
11
+ # Advanced NLP models - REQUIRED for improved app
12
+ sentence-transformers>=2.2.0
13
+
14
+ # Optional but recommended for better performance
15
+ accelerate>=0.20.0
16
+ scipy>=1.7.0
17
+ scikit-learn>=1.0.0
18
+
19
+ # For data processing and utilities
20
+ tqdm>=4.65.0
21
+ tokenizers>=0.13.0