Spaces:

AssanaliAidarkhan
/

Biomedclip

Sleeping

App Files Files

xet

Community

AssanaliAidarkhan commited on Aug 31

Commit

9e90db6

verified ·

1 Parent(s): 750248d

Update app.py

Browse files

Files changed (1) hide show

app.py +194 -91

app.py CHANGED Viewed

@@ -2,13 +2,17 @@ import gradio as gr
 import torch
 import torch.nn as nn
 from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPModel, CLIPProcessor
 from huggingface_hub import hf_hub_download
 from PIL import Image
 import torch.nn.functional as F
 import json
 # Model repositories
 BIOMEDCLIP_REPO = "AssanaliAidarkhan/Biomedclip"
 # Global variables
 biomedclip_model = None
@@ -16,6 +20,9 @@ biomedclip_processor = None
 biomedclip_id2label = {}
 qwen_model = None
 qwen_tokenizer = None
 class CLIPClassifier(nn.Module):
     def __init__(self, clip_model, num_classes):
@@ -53,140 +60,214 @@ def load_biomedclip():
         print(f"❌ BiomedCLIP error: {e}")
         return False
-def load_qwen_simple():
-    """Load Qwen with minimal setup"""
-    global qwen_model, qwen_tokenizer
     try:
-        print("🔄 Loading Qwen (simple)...")
-        # Load Qwen directly
-        qwen_tokenizer = AutoTokenizer.from_pretrained(
-            "Qwen/Qwen1.5-0.5B-Chat",
-            trust_remote_code=True
-        )
         qwen_model = AutoModelForCausalLM.from_pretrained(
             "Qwen/Qwen1.5-0.5B-Chat",
             torch_dtype=torch.float32,
             trust_remote_code=True
         )
-        print("✅ Qwen loaded!")
         return True
     except Exception as e:
-        print(f"❌ Qwen error: {e}")
         return False
-def classify_mri(image):
-    """Classify MRI (working code)"""
-    if biomedclip_model is None or image is None:
-        return None
     try:
-        if image.mode != 'RGB':
-            image = image.convert('RGB')
-        inputs = biomedclip_processor(images=image, return_tensors="pt")
-        with torch.no_grad():
-            outputs = biomedclip_model(**inputs)
-            logits = outputs['logits']
-            probabilities = F.softmax(logits, dim=1)
-        top_prob, top_idx = torch.max(probabilities, 1)
-        class_idx = top_idx.item()
-        if class_idx in biomedclip_id2label:
-            class_name = biomedclip_id2label[class_idx]
-        elif str(class_idx) in biomedclip_id2label:
-            class_name = biomedclip_id2label[str(class_idx)]
-        else:
-            class_name = f"Class_{class_idx}"
-        confidence = top_prob.item() * 100
-        return class_name, confidence
     except Exception as e:
-        print(f"Classification error: {e}")
-        return None, None
-def generate_simple_advice(class_name, confidence):
-    """Generate advice using Qwen (simple approach)"""
     global qwen_model, qwen_tokenizer
     if qwen_model is None:
         return "❌ Qwen model not loaded"
     try:
-        print(f"🔄 Generating advice for: {class_name}")
-        # Simple medical knowledge lookup
-        advice_map = {
-            "partial_acl_injury": "Partial ACL injury detected. Recommendations: Rest and avoid pivoting activities. Apply ice for 15-20 minutes several times daily. Consider physical therapy consultation. Follow-up MRI in 6-8 weeks to monitor healing.",
-            "complete_acl_tear": "Complete ACL tear detected. Urgent orthopedic consultation required. Likely surgical reconstruction needed. Immediate immobilization and avoid weight-bearing activities.",
-            "acl_sprain": "ACL sprain detected. Conservative treatment with RICE protocol (Rest, Ice, Compression, Elevation). Physical therapy for strengthening. Gradual return to activities.",
-            "normal": "ACL appears normal. Continue regular activities. If symptoms persist, consider clinical examination for other causes."
-        }
-        # Get base advice
-        base_advice = advice_map.get(class_name.lower(), "Consult medical professional for evaluation.")
-        # Create simple prompt for Qwen
-        simple_prompt = f"Medical diagnosis: {class_name} with {confidence:.1f}% confidence. Provide brief clinical advice:"
         # Tokenize
-        inputs = qwen_tokenizer(simple_prompt, return_tensors="pt")
         # Generate
         with torch.no_grad():
             outputs = qwen_model.generate(
                 inputs.input_ids,
-                max_new_tokens=100,
-                temperature=0.8,
                 do_sample=True,
-                pad_token_id=qwen_tokenizer.eos_token_id
             )
-        # Decode
-        full_output = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract just the generated part
-        if simple_prompt in full_output:
-            generated_advice = full_output.replace(simple_prompt, "").strip()
-        else:
-            generated_advice = full_output
-        # Combine base advice with Qwen advice
-        if generated_advice and len(generated_advice) > 10:
-            combined_advice = f"**Clinical Guidelines:** {base_advice}\n\n**AI Analysis:** {generated_advice}"
-        else:
-            combined_advice = base_advice
-        print(f"✅ Generated advice: {generated_advice[:50]}...")
-        return combined_advice
     except Exception as e:
-        print(f"❌ Advice generation error: {e}")
-        # Fallback to basic advice
-        return advice_map.get(class_name.lower(), "Consult medical professional for evaluation.")
-def complete_pipeline(image):
-    """Complete analysis pipeline"""
     if image is None:
         return "❌ Please upload an MRI scan", ""
-    # Step 1: Classification
-    class_name, confidence = classify_mri(image)
-    if class_name is None:
-        return "❌ Classification failed", ""
-    # Step 2: Medical advice
-    medical_advice = generate_simple_advice(class_name, confidence)
     # Format outputs
     classification_text = f"""
@@ -200,43 +281,65 @@ def complete_pipeline(image):
     """
     advice_text = f"""
-# 🏥 **Medical Recommendations**
-{medical_advice}
 ---
-⚠️ **Disclaimer:** For educational purposes only. Consult medical professionals.
     """
     return classification_text, advice_text
 # Load models
 biomedclip_loaded = load_biomedclip()
-qwen_loaded = load_qwen_simple()
 # Create interface
-with gr.Blocks(title="Medical AI Pipeline") as app:
-    gr.Markdown("# 🏥 Medical AI Analysis Pipeline")
-    gr.Markdown("**BiomedCLIP** (Classification) + **Qwen** (Medical Advice)")
-    status = f"Status: BiomedCLIP {'✅' if biomedclip_loaded else '❌'} | Qwen {'✅' if qwen_loaded else '❌'}"
-    gr.Markdown(f"**{status}**")
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="pil", label="📸 Upload MRI Scan")
-            analyze_btn = gr.Button("🔬 Complete Analysis", variant="primary")
         with gr.Column():
             classification_output = gr.Markdown(label="🔬 Classification")
-            advice_output = gr.Markdown(label="🏥 Medical Advice")
     analyze_btn.click(
-        fn=complete_pipeline,
         inputs=image_input,
         outputs=[classification_output, advice_output]
     )
 if __name__ == "__main__":
     app.launch()

 import torch
 import torch.nn as nn
 from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPModel, CLIPProcessor
+from sentence_transformers import SentenceTransformer
 from huggingface_hub import hf_hub_download
 from PIL import Image
 import torch.nn.functional as F
 import json
+import numpy as np
+import faiss
 # Model repositories
 BIOMEDCLIP_REPO = "AssanaliAidarkhan/Biomedclip"
+QWEN_RAG_REPO = "AssanaliAidarkhan/qwen-medical-rag"
 # Global variables
 biomedclip_model = None
 biomedclip_id2label = {}
 qwen_model = None
 qwen_tokenizer = None
+embedding_model = None
+medical_knowledge = []
+faiss_index = None
 class CLIPClassifier(nn.Module):
     def __init__(self, clip_model, num_classes):
         print(f"❌ BiomedCLIP error: {e}")
         return False
+def load_rag_system():
+    """Load complete RAG system"""
+    global qwen_model, qwen_tokenizer, embedding_model, medical_knowledge, faiss_index
     try:
+        print("🔄 Loading RAG system...")
+        # 1. Load medical knowledge base
+        try:
+            knowledge_path = hf_hub_download(repo_id=QWEN_RAG_REPO, filename="medical_knowledge.json")
+            with open(knowledge_path, 'r', encoding='utf-8') as f:
+                medical_knowledge = json.load(f)
+            print(f"✅ Knowledge base: {len(medical_knowledge)} documents")
+        except Exception as e:
+            print(f"⚠️ Knowledge loading error: {e}, using fallback")
+            # Fallback knowledge base
+            medical_knowledge = [
+                {
+                    "id": "doc1",
+                    "title": "Частичное повреждения передней крестообразной связки",
+                    "content": "Признаки частичного повреждения передней крестообразной связки: утолщение, повышенный сигнал по Т2, частичная дезорганизация волокон, связка прослеживается по ходу",
+                    "category": "Partial ACL injury",
+                    "advice": "Recommend conservative treatment, physical therapy, follow-up MRI in 6-8 weeks"
+                },
+                {
+                    "id": "doc2",
+                    "title": "Полный разрыв передней крестообразной связки",
+                    "content": "Признаки полного разрыва передней крестообразной связки: волокна не прослеживаются по ходу, определяется зона повышенного сигнала в проекции связки, гемартроз",
+                    "category": "Complete ACL tear",
+                    "advice": "Urgent orthopedic consultation, likely requires ACL reconstruction surgery"
+                }
+            ]
+        # 2. Load embedding model
+        print("🔄 Loading embeddings...")
+        embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+        # 3. Create embeddings and FAISS index
+        print("🔄 Creating FAISS index...")
+        text_contents = []
+        for doc in medical_knowledge:
+            text = f"{doc.get('title', '')} {doc.get('content', '')} {doc.get('advice', '')}"
+            text_contents.append(text)
+        embeddings = embedding_model.encode(text_contents, convert_to_numpy=True)
+        # Create FAISS index
+        dimension = embeddings.shape[1]
+        faiss_index = faiss.IndexFlatIP(dimension)
+        faiss.normalize_L2(embeddings)
+        faiss_index.add(embeddings)
+        print(f"✅ FAISS index created with {faiss_index.ntotal} documents")
+        # 4. Load Qwen
+        print("🔄 Loading Qwen...")
+        qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat", trust_remote_code=True)
         qwen_model = AutoModelForCausalLM.from_pretrained(
             "Qwen/Qwen1.5-0.5B-Chat",
             torch_dtype=torch.float32,
             trust_remote_code=True
         )
+        print("✅ RAG system loaded completely!")
         return True
     except Exception as e:
+        print(f"❌ RAG loading error: {e}")
+        import traceback
+        print(traceback.format_exc())
         return False
+def retrieve_relevant_knowledge(classification_result):
+    """Retrieve relevant medical documents"""
+    global embedding_model, medical_knowledge, faiss_index
+    if faiss_index is None:
+        return [], "No knowledge base available"
     try:
+        # Create query for retrieval
+        query = f"Medical diagnosis {classification_result} treatment recommendations clinical advice"
+        # Get query embedding
+        query_embedding = embedding_model.encode([query], convert_to_numpy=True)
+        faiss.normalize_L2(query_embedding)
+        # Search FAISS index
+        scores, indices = faiss_index.search(query_embedding, 2)  # Top 2 documents
+        # Get relevant documents
+        retrieved_docs = []
+        context_text = ""
+        for score, idx in zip(scores[0], indices[0]):
+            if idx != -1 and idx < len(medical_knowledge):
+                doc = medical_knowledge[idx]
+                retrieved_docs.append((doc, float(score)))
+                context_text += f"Medical Knowledge: {doc.get('content', '')}\n"
+                context_text += f"Clinical Advice: {doc.get('advice', '')}\n"
+                context_text += f"Category: {doc.get('category', '')}\n\n"
+        return retrieved_docs, context_text
     except Exception as e:
+        print(f"❌ Retrieval error: {e}")
+        return [], f"Retrieval error: {e}"
+def generate_qwen_advice(classification_result, retrieved_context):
+    """Generate medical advice using Qwen with RAG context"""
     global qwen_model, qwen_tokenizer
     if qwen_model is None:
         return "❌ Qwen model not loaded"
     try:
+        print("🔄 Generating Qwen advice...")
+        # Create comprehensive prompt
+        prompt = f"""You are a medical AI assistant. Based on the MRI classification and medical knowledge provided, give clinical recommendations.
+MRI Classification: {classification_result}
+Retrieved Medical Knowledge:
+{retrieved_context}
+Provide specific clinical recommendations including treatment options and follow-up care:"""
+        print(f"📝 Prompt length: {len(prompt)} characters")
         # Tokenize
+        inputs = qwen_tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
+        print(f"🔧 Input tokens: {inputs.input_ids.shape}")
         # Generate
         with torch.no_grad():
             outputs = qwen_model.generate(
                 inputs.input_ids,
+                max_new_tokens=120,
+                temperature=0.7,
                 do_sample=True,
+                pad_token_id=qwen_tokenizer.eos_token_id,
+                eos_token_id=qwen_tokenizer.eos_token_id
             )
+        # Decode only the new tokens
+        generated_tokens = outputs[0][inputs.input_ids.shape[1]:]
+        generated_text = qwen_tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
+        print(f"✅ Generated: {generated_text[:100]}...")
+        if len(generated_text) < 10:
+            return "No specific recommendations generated. Consult medical professional."
+        return generated_text
     except Exception as e:
+        print(f"❌ Qwen generation error: {e}")
+        import traceback
+        print(traceback.format_exc())
+        return f"Generation error: {e}"
+def complete_analysis(image):
+    """Complete pipeline with RAG"""
     if image is None:
         return "❌ Please upload an MRI scan", ""
+    # Step 1: Classification
+    try:
+        if biomedclip_model is None:
+            return "❌ BiomedCLIP not loaded", ""
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        inputs = biomedclip_processor(images=image, return_tensors="pt")
+        with torch.no_grad():
+            outputs = biomedclip_model(**inputs)
+            logits = outputs['logits']
+            probabilities = F.softmax(logits, dim=1)
+        top_prob, top_idx = torch.max(probabilities, 1)
+        class_idx = top_idx.item()
+        if class_idx in biomedclip_id2label:
+            class_name = biomedclip_id2label[class_idx]
+        elif str(class_idx) in biomedclip_id2label:
+            class_name = biomedclip_id2label[str(class_idx)]
+        else:
+            class_name = f"Class_{class_idx}"
+        confidence = top_prob.item() * 100
+        classification_result = f"{class_name} ({confidence:.1f}% confidence)"
+        print(f"✅ Classification: {classification_result}")
+    except Exception as e:
+        return f"❌ Classification error: {e}", ""
+    # Step 2: RAG retrieval
+    retrieved_docs, context = retrieve_relevant_knowledge(classification_result)
+    # Step 3: Qwen generation
+    qwen_advice = generate_qwen_advice(classification_result, context)
     # Format outputs
     classification_text = f"""
     """
     advice_text = f"""
+# 🏥 **AI-Generated Medical Recommendations**
+## 🤖 **Qwen Analysis:**
+{qwen_advice}
+## 📚 **Retrieved Medical Knowledge:**
+{context if context else "No relevant knowledge retrieved"}
+## 📋 **Retrieved Documents:**
+{len(retrieved_docs)} documents found and used for advice generation
 ---
+⚠️ **Disclaimer:** For educational purposes only. Always consult medical professionals.
     """
     return classification_text, advice_text
 # Load models
+print("🚀 Loading complete pipeline...")
 biomedclip_loaded = load_biomedclip()
+rag_loaded = load_rag_system()
 # Create interface
+with gr.Blocks(title="Medical RAG Pipeline") as app:
+    gr.Markdown("# 🏥 Medical AI RAG Pipeline")
+    gr.Markdown("**BiomedCLIP** → **RAG Retrieval** → **Qwen Generation**")
+    status = f"BiomedCLIP: {'✅' if biomedclip_loaded else '❌'} | RAG: {'✅' if rag_loaded else '❌'}"
+    gr.Markdown(f"**Status:** {status}")
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="pil", label="📸 Upload MRI Scan")
+            analyze_btn = gr.Button("🔬 Complete RAG Analysis", variant="primary")
         with gr.Column():
             classification_output = gr.Markdown(label="🔬 Classification")
+            advice_output = gr.Markdown(label="🏥 RAG-Generated Advice")
     analyze_btn.click(
+        fn=complete_analysis,
         inputs=image_input,
         outputs=[classification_output, advice_output]
     )
+    gr.Markdown("""
+    ### 🔄 **RAG Pipeline Process:**
+    1. **Image Classification** - BiomedCLIP analyzes MRI
+    2. **Knowledge Retrieval** - Find relevant medical documents
+    3. **Context Generation** - Qwen uses retrieved knowledge
+    4. **Advice Output** - AI-generated clinical recommendations
+    ### 📚 **Knowledge Base:**
+    - ACL injury types and symptoms
+    - Treatment recommendations
+    - Clinical guidelines
+    - Follow-up protocols
+    """)
 if __name__ == "__main__":
     app.launch()