AD2000X commited on
Commit
706c0a5
·
verified ·
1 Parent(s): 7c71289

Update src/visualization.py

Browse files
Files changed (1) hide show
  1. src/visualization.py +123 -57
src/visualization.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import streamlit as st
2
  import json
3
  import networkx as nx
@@ -10,6 +12,123 @@ import matplotlib.colors as mcolors
10
  from collections import defaultdict
11
  import math
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def display_reasoning_trace(query: str, retrieved_docs: List[Dict], answer: str, ontology_manager):
14
  """Display an enhanced trace of how ontological reasoning was used to answer the query."""
15
  st.subheader("🧠 Ontology-Enhanced Reasoning")
@@ -176,60 +295,7 @@ def display_reasoning_trace(query: str, retrieved_docs: List[Dict], answer: str,
176
  st.markdown(f"**{adv['title']}**")
177
  st.markdown(adv["description"])
178
 
179
-
180
- def analyze_query_ontology_concepts(query: str, ontology_manager) -> Tuple[List[Dict], List[Dict]]:
181
- """
182
- Analyze the query to identify ontology concepts with confidence scores.
183
- This is a simplified implementation that would be replaced with NLP in production.
184
- """
185
- query_lower = query.lower()
186
-
187
- # Entity detection
188
- entity_mentions = []
189
- classes = ontology_manager.get_classes()
190
-
191
- for class_name in classes:
192
- # Use word boundary regex to improve matching
193
- import re
194
- pattern = r'\b' + re.escape(class_name.lower()) + r'\b'
195
- if re.search(pattern, query_lower):
196
- # Get class info
197
- class_info = ontology_manager.ontology_data["classes"].get(class_name, {})
198
-
199
- # Assign a confidence score (this would be from an ML model in production)
200
- # Here we use a simple heuristic based on word length and specificity
201
- confidence = min(0.95, 0.5 + (len(class_name) / 20))
202
-
203
- entity_mentions.append({
204
- "type": class_name,
205
- "confidence": confidence,
206
- "description": class_info.get("description", "")
207
- })
208
-
209
- # Similar improvement for relationship detection
210
- relationship_mentions = []
211
- relationships = ontology_manager.ontology_data.get("relationships", [])
212
-
213
- for rel in relationships:
214
- rel_name = rel["name"]
215
-
216
- # Use word boundary regex
217
- pattern = r'\b' + re.escape(rel_name.lower()) + r'\b'
218
- if re.search(pattern, query_lower):
219
- # Assign confidence
220
- confidence = min(0.9, 0.5 + (len(rel_name) / 20))
221
-
222
- relationship_mentions.append({
223
- "name": rel_name,
224
- "domain": rel["domain"],
225
- "range": rel["range"],
226
- "confidence": confidence,
227
- "description": rel.get("description", "")
228
- })
229
-
230
- return entity_mentions, relationship_mentions
231
-
232
-
233
  def display_retrieval_flow(query: str, docs_by_source: Dict[str, List]):
234
  """Create a Sankey diagram showing the flow from query to sources to answer."""
235
  # Define node labels
@@ -266,7 +332,7 @@ def display_retrieval_flow(query: str, docs_by_source: Dict[str, List]):
266
  line=dict(color="black", width=0.5),
267
  label=nodes,
268
  color=["#1f77b4"] + [px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)]
269
- for i in range(len(docs_by_source))] + ["#2ca02c"]
270
  ),
271
  link=dict(
272
  source=source_indices,
@@ -300,7 +366,7 @@ def generate_reasoning_steps(query: str, entity_mentions: List[Dict], relationsh
300
  if retrieved_docs:
301
  doc_count = len(retrieved_docs)
302
  ontology_count = sum(1 for doc in retrieved_docs if hasattr(doc, 'metadata') and
303
- doc.metadata.get('source', '') in ['ontology', 'ontology_context'])
304
 
305
  steps.append({
306
  "title": "Knowledge Retrieval",
@@ -378,7 +444,7 @@ def explain_ontology_advantages(entity_mentions: List[Dict], relationship_mentio
378
  development or how customer feedback influences business strategy."""
379
  })
380
 
381
- return advantages# src/visualization.py
382
 
383
  def render_html_in_streamlit(html_content: str):
384
  """Display HTML content in Streamlit using components.html."""
 
1
+ # Add this at the top of visualization.py with other imports
2
+ import re
3
  import streamlit as st
4
  import json
5
  import networkx as nx
 
12
  from collections import defaultdict
13
  import math
14
 
15
+ def analyze_query_ontology_concepts(query: str, ontology_manager) -> Tuple[List[Dict], List[Dict]]:
16
+ """
17
+ Analyze the query to identify ontology concepts with confidence scores.
18
+ This is a simplified implementation that would be replaced with NLP in production.
19
+ """
20
+ # For debugging - print the query
21
+ st.write(f"Debug - Analyzing query: '{query}'")
22
+
23
+ query_lower = query.lower()
24
+
25
+ # Entity detection
26
+ entity_mentions = []
27
+ classes = ontology_manager.get_classes()
28
+
29
+ # Debug - print available classes
30
+ st.write(f"Debug - Available classes: {classes[:5]}...")
31
+
32
+ for class_name in classes:
33
+ # Check for both exact word match and partial match
34
+ word_match = False
35
+ partial_match = False
36
+
37
+ # Word boundary regex for exact word match
38
+ pattern = r'\b' + re.escape(class_name.lower()) + r'\b'
39
+ word_match = bool(re.search(pattern, query_lower))
40
+
41
+ # Check for partial match in case class has multiple words or special spelling
42
+ partial_match = class_name.lower() in query_lower
43
+
44
+ if word_match or partial_match:
45
+ # Get class info
46
+ class_info = ontology_manager.ontology_data["classes"].get(class_name, {})
47
+
48
+ # Confidence is higher for word match than partial match
49
+ base_confidence = 0.9 if word_match else 0.7
50
+ length_factor = min(0.05, (len(class_name) / 400)) # Adjust for length but don't penalize too much
51
+ confidence = min(0.95, base_confidence + length_factor)
52
+
53
+ entity_mentions.append({
54
+ "type": class_name,
55
+ "confidence": confidence,
56
+ "description": class_info.get("description", ""),
57
+ "match_type": "word" if word_match else "partial"
58
+ })
59
+
60
+ # Debug - show what was found
61
+ st.write(f"Debug - Found {len(entity_mentions)} entity mentions")
62
+
63
+ # Relationship detection
64
+ relationship_mentions = []
65
+ relationships = ontology_manager.ontology_data.get("relationships", [])
66
+
67
+ for rel in relationships:
68
+ rel_name = rel["name"]
69
+
70
+ # Try both word boundary and partial match
71
+ word_match = False
72
+ partial_match = False
73
+
74
+ pattern = r'\b' + re.escape(rel_name.lower()) + r'\b'
75
+ word_match = bool(re.search(pattern, query_lower))
76
+
77
+ # For relationships like "ownedBy", check if it appears as part of words too
78
+ partial_match = rel_name.lower() in query_lower
79
+
80
+ if word_match or partial_match:
81
+ # Higher confidence for word match
82
+ base_confidence = 0.85 if word_match else 0.65
83
+ length_factor = min(0.05, (len(rel_name) / 400))
84
+ confidence = min(0.9, base_confidence + length_factor)
85
+
86
+ relationship_mentions.append({
87
+ "name": rel_name,
88
+ "domain": rel["domain"],
89
+ "range": rel["range"],
90
+ "confidence": confidence,
91
+ "description": rel.get("description", ""),
92
+ "match_type": "word" if word_match else "partial"
93
+ })
94
+
95
+ # Debug - show what was found
96
+ st.write(f"Debug - Found {len(relationship_mentions)} relationship mentions")
97
+
98
+ # Add hardcoded examples if nothing detected (fallback for debugging)
99
+ if not entity_mentions and not relationship_mentions:
100
+ st.write("Debug - No matches found, adding fallback examples")
101
+
102
+ # Add some fallback examples to ensure the UI displays something
103
+ entity_mentions.append({
104
+ "type": "Customer",
105
+ "confidence": 0.8,
106
+ "description": "A person or organization that purchases products or services",
107
+ "match_type": "fallback"
108
+ })
109
+
110
+ entity_mentions.append({
111
+ "type": "Product",
112
+ "confidence": 0.75,
113
+ "description": "An item offered for sale or use",
114
+ "match_type": "fallback"
115
+ })
116
+
117
+ relationship_mentions.append({
118
+ "name": "provides",
119
+ "domain": "Customer",
120
+ "range": "Feedback",
121
+ "confidence": 0.7,
122
+ "description": "Connects customers to their feedback submissions",
123
+ "match_type": "fallback"
124
+ })
125
+
126
+ # Clear debug messages before returning
127
+ # st.empty()
128
+
129
+ return entity_mentions, relationship_mentions
130
+
131
+ # The rest of your visualization.py file, including the fixed display_reasoning_trace function
132
  def display_reasoning_trace(query: str, retrieved_docs: List[Dict], answer: str, ontology_manager):
133
  """Display an enhanced trace of how ontological reasoning was used to answer the query."""
134
  st.subheader("🧠 Ontology-Enhanced Reasoning")
 
295
  st.markdown(f"**{adv['title']}**")
296
  st.markdown(adv["description"])
297
 
298
+ # The rest of the code remains the same as in your original visualization.py file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  def display_retrieval_flow(query: str, docs_by_source: Dict[str, List]):
300
  """Create a Sankey diagram showing the flow from query to sources to answer."""
301
  # Define node labels
 
332
  line=dict(color="black", width=0.5),
333
  label=nodes,
334
  color=["#1f77b4"] + [px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)]
335
+ for i in range(len(docs_by_source))] + ["#2ca02c"]
336
  ),
337
  link=dict(
338
  source=source_indices,
 
366
  if retrieved_docs:
367
  doc_count = len(retrieved_docs)
368
  ontology_count = sum(1 for doc in retrieved_docs if hasattr(doc, 'metadata') and
369
+ doc.metadata.get('source', '') in ['ontology', 'ontology_context'])
370
 
371
  steps.append({
372
  "title": "Knowledge Retrieval",
 
444
  development or how customer feedback influences business strategy."""
445
  })
446
 
447
+ return advantages
448
 
449
  def render_html_in_streamlit(html_content: str):
450
  """Display HTML content in Streamlit using components.html."""