Spaces:

AD2000X
/

Ontology-RAG-Demo

Running

App Files Files Community

AD2000X commited on Apr 2

Commit

706c0a5

verified ·

1 Parent(s): 7c71289

Update src/visualization.py

Browse files

Files changed (1) hide show

src/visualization.py +123 -57

src/visualization.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import streamlit as st
 import json
 import networkx as nx
@@ -10,6 +12,123 @@ import matplotlib.colors as mcolors
 from collections import defaultdict
 import math
 def display_reasoning_trace(query: str, retrieved_docs: List[Dict], answer: str, ontology_manager):
     """Display an enhanced trace of how ontological reasoning was used to answer the query."""
     st.subheader("🧠 Ontology-Enhanced Reasoning")
@@ -176,60 +295,7 @@ def display_reasoning_trace(query: str, retrieved_docs: List[Dict], answer: str,
             st.markdown(f"**{adv['title']}**")
             st.markdown(adv["description"])
-def analyze_query_ontology_concepts(query: str, ontology_manager) -> Tuple[List[Dict], List[Dict]]:
-    """
-    Analyze the query to identify ontology concepts with confidence scores.
-    This is a simplified implementation that would be replaced with NLP in production.
-    """
-    query_lower = query.lower()
-    # Entity detection
-    entity_mentions = []
-    classes = ontology_manager.get_classes()
-    for class_name in classes:
-        # Use word boundary regex to improve matching
-        import re
-        pattern = r'\b' + re.escape(class_name.lower()) + r'\b'
-        if re.search(pattern, query_lower):
-            # Get class info
-            class_info = ontology_manager.ontology_data["classes"].get(class_name, {})
-            # Assign a confidence score (this would be from an ML model in production)
-            # Here we use a simple heuristic based on word length and specificity
-            confidence = min(0.95, 0.5 + (len(class_name) / 20))
-            entity_mentions.append({
-                "type": class_name,
-                "confidence": confidence,
-                "description": class_info.get("description", "")
-            })
-    # Similar improvement for relationship detection
-    relationship_mentions = []
-    relationships = ontology_manager.ontology_data.get("relationships", [])
-    for rel in relationships:
-        rel_name = rel["name"]
-        # Use word boundary regex
-        pattern = r'\b' + re.escape(rel_name.lower()) + r'\b'
-        if re.search(pattern, query_lower):
-            # Assign confidence
-            confidence = min(0.9, 0.5 + (len(rel_name) / 20))
-            relationship_mentions.append({
-                "name": rel_name,
-                "domain": rel["domain"],
-                "range": rel["range"],
-                "confidence": confidence,
-                "description": rel.get("description", "")
-            })
-    return entity_mentions, relationship_mentions
 def display_retrieval_flow(query: str, docs_by_source: Dict[str, List]):
     """Create a Sankey diagram showing the flow from query to sources to answer."""
     # Define node labels
@@ -266,7 +332,7 @@ def display_retrieval_flow(query: str, docs_by_source: Dict[str, List]):
             line=dict(color="black", width=0.5),
             label=nodes,
             color=["#1f77b4"] + [px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)]
-        for i in range(len(docs_by_source))] + ["#2ca02c"]
         ),
         link=dict(
             source=source_indices,
@@ -300,7 +366,7 @@ def generate_reasoning_steps(query: str, entity_mentions: List[Dict], relationsh
     if retrieved_docs:
         doc_count = len(retrieved_docs)
         ontology_count = sum(1 for doc in retrieved_docs if hasattr(doc, 'metadata') and
-        doc.metadata.get('source', '') in ['ontology', 'ontology_context'])
         steps.append({
             "title": "Knowledge Retrieval",
@@ -378,7 +444,7 @@ def explain_ontology_advantages(entity_mentions: List[Dict], relationship_mentio
         development or how customer feedback influences business strategy."""
     })
-    return advantages# src/visualization.py
 def render_html_in_streamlit(html_content: str):
     """Display HTML content in Streamlit using components.html."""

+# Add this at the top of visualization.py with other imports
+import re
 import streamlit as st
 import json
 import networkx as nx
 from collections import defaultdict
 import math
+def analyze_query_ontology_concepts(query: str, ontology_manager) -> Tuple[List[Dict], List[Dict]]:
+    """
+    Analyze the query to identify ontology concepts with confidence scores.
+    This is a simplified implementation that would be replaced with NLP in production.
+    """
+    # For debugging - print the query
+    st.write(f"Debug - Analyzing query: '{query}'")
+    query_lower = query.lower()
+    # Entity detection
+    entity_mentions = []
+    classes = ontology_manager.get_classes()
+    # Debug - print available classes
+    st.write(f"Debug - Available classes: {classes[:5]}...")
+    for class_name in classes:
+        # Check for both exact word match and partial match
+        word_match = False
+        partial_match = False
+        # Word boundary regex for exact word match
+        pattern = r'\b' + re.escape(class_name.lower()) + r'\b'
+        word_match = bool(re.search(pattern, query_lower))
+        # Check for partial match in case class has multiple words or special spelling
+        partial_match = class_name.lower() in query_lower
+        if word_match or partial_match:
+            # Get class info
+            class_info = ontology_manager.ontology_data["classes"].get(class_name, {})
+            # Confidence is higher for word match than partial match
+            base_confidence = 0.9 if word_match else 0.7
+            length_factor = min(0.05, (len(class_name) / 400))  # Adjust for length but don't penalize too much
+            confidence = min(0.95, base_confidence + length_factor)
+            entity_mentions.append({
+                "type": class_name,
+                "confidence": confidence,
+                "description": class_info.get("description", ""),
+                "match_type": "word" if word_match else "partial"
+            })
+    # Debug - show what was found
+    st.write(f"Debug - Found {len(entity_mentions)} entity mentions")
+    # Relationship detection
+    relationship_mentions = []
+    relationships = ontology_manager.ontology_data.get("relationships", [])
+    for rel in relationships:
+        rel_name = rel["name"]
+        # Try both word boundary and partial match
+        word_match = False
+        partial_match = False
+        pattern = r'\b' + re.escape(rel_name.lower()) + r'\b'
+        word_match = bool(re.search(pattern, query_lower))
+        # For relationships like "ownedBy", check if it appears as part of words too
+        partial_match = rel_name.lower() in query_lower
+        if word_match or partial_match:
+            # Higher confidence for word match
+            base_confidence = 0.85 if word_match else 0.65
+            length_factor = min(0.05, (len(rel_name) / 400))
+            confidence = min(0.9, base_confidence + length_factor)
+            relationship_mentions.append({
+                "name": rel_name,
+                "domain": rel["domain"],
+                "range": rel["range"],
+                "confidence": confidence,
+                "description": rel.get("description", ""),
+                "match_type": "word" if word_match else "partial"
+            })
+    # Debug - show what was found
+    st.write(f"Debug - Found {len(relationship_mentions)} relationship mentions")
+    # Add hardcoded examples if nothing detected (fallback for debugging)
+    if not entity_mentions and not relationship_mentions:
+        st.write("Debug - No matches found, adding fallback examples")
+        # Add some fallback examples to ensure the UI displays something
+        entity_mentions.append({
+            "type": "Customer",
+            "confidence": 0.8,
+            "description": "A person or organization that purchases products or services",
+            "match_type": "fallback"
+        })
+        entity_mentions.append({
+            "type": "Product",
+            "confidence": 0.75,
+            "description": "An item offered for sale or use",
+            "match_type": "fallback"
+        })
+        relationship_mentions.append({
+            "name": "provides",
+            "domain": "Customer",
+            "range": "Feedback",
+            "confidence": 0.7,
+            "description": "Connects customers to their feedback submissions",
+            "match_type": "fallback"
+        })
+    # Clear debug messages before returning
+    # st.empty()
+    return entity_mentions, relationship_mentions
+# The rest of your visualization.py file, including the fixed display_reasoning_trace function
 def display_reasoning_trace(query: str, retrieved_docs: List[Dict], answer: str, ontology_manager):
     """Display an enhanced trace of how ontological reasoning was used to answer the query."""
     st.subheader("🧠 Ontology-Enhanced Reasoning")
             st.markdown(f"**{adv['title']}**")
             st.markdown(adv["description"])
+# The rest of the code remains the same as in your original visualization.py file
 def display_retrieval_flow(query: str, docs_by_source: Dict[str, List]):
     """Create a Sankey diagram showing the flow from query to sources to answer."""
     # Define node labels
             line=dict(color="black", width=0.5),
             label=nodes,
             color=["#1f77b4"] + [px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)]
+                for i in range(len(docs_by_source))] + ["#2ca02c"]
         ),
         link=dict(
             source=source_indices,
     if retrieved_docs:
         doc_count = len(retrieved_docs)
         ontology_count = sum(1 for doc in retrieved_docs if hasattr(doc, 'metadata') and
+                           doc.metadata.get('source', '') in ['ontology', 'ontology_context'])
         steps.append({
             "title": "Knowledge Retrieval",
         development or how customer feedback influences business strategy."""
     })
+    return advantages
 def render_html_in_streamlit(html_content: str):
     """Display HTML content in Streamlit using components.html."""