Spaces:
Running
Running
import streamlit as st | |
st.set_page_config(page_title="Ontology RAG Demo", layout="wide") | |
import os | |
from src.semantic_retriever import SemanticRetriever | |
from src.ontology_manager import OntologyManager | |
from src.knowledge_graph import KnowledgeGraph | |
from src.visualization import (display_ontology_stats, display_entity_details, | |
display_graph_visualization, visualize_path, | |
display_reasoning_trace, render_html_in_streamlit) | |
import networkx as nx | |
from openai import OpenAI | |
import json | |
# Setup | |
llm = OpenAI(api_key=st.secrets["OPENAI_API_KEY"]) | |
ontology_manager = OntologyManager("data/enterprise_ontology.json") | |
semantic_retriever = SemanticRetriever(ontology_manager=ontology_manager) | |
knowledge_graph = KnowledgeGraph(ontology_manager=ontology_manager) | |
k_val = st.sidebar.slider("Top K Results", 1, 10, 3) | |
def main(): | |
# Page Navigation | |
st.sidebar.title("Page Navigation") | |
page = st.sidebar.selectbox( | |
"Select function", | |
["RAG comparison demonstration", "Knowledge graph visualization", "Ontology structure analysis", "Entity exploration", "Semantic path visualization", "Inference tracking", "Detailed comparative analysis"] | |
) | |
if page == "RAG Comparison Demo": | |
run_rag_demo() | |
elif page == "Knowledge Graph Visualization": | |
run_knowledge_graph_visualization() | |
elif page == "Ontology Structure Analysis": | |
run_ontology_structure_analysis() | |
elif page == "Entity Exploration": | |
run_entity_exploration() | |
elif page == "Semantic Path Visualization": | |
run_semantic_path_visualization() | |
elif page == "Inference Tracking": | |
run_reasoning_trace() | |
elif page == "Detailed comparative analysis": | |
run_detailed_comparison() | |
def run_rag_demo(): | |
st.title("Ontology Enhanced RAG Demonstration") | |
query = st.text_input( | |
"Enter a question to compare RAG methods:", | |
"How does customer feedback influence product development?" | |
) | |
if query: | |
col1, col2 = st.columns(2) | |
with st.spinner("Run two RAG methods..."): | |
# Traditional RAG | |
with col1: | |
st.subheader("Traditional RAG") | |
vector_docs = semantic_retriever.vector_store.similarity_search(query, k=k_val) | |
vector_context = "\n\n".join([doc.page_content for doc in vector_docs]) | |
vector_messages = [ | |
{"role": "system", "content": f"You are an enterprise knowledge assistant...\nContext:\n{vector_context}"}, | |
{"role": "user", "content": query} | |
] | |
vector_response = llm.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=vector_messages | |
) | |
vector_answer = vector_response.choices[0].message.content | |
st.markdown("#### answer") | |
st.write(vector_answer) | |
st.markdown("#### retrieval context") | |
for i, doc in enumerate(vector_docs): | |
with st.expander(f"Source {i+1}"): | |
st.code(doc.page_content) | |
# # Ontology RAG | |
with col2: | |
st.subheader("Ontology RAG") | |
result = semantic_retriever.retrieve_with_paths(query, k=k_val) | |
retrieved_docs = result["documents"] | |
enhanced_context = "\n\n".join([doc.page_content for doc in retrieved_docs]) | |
enhanced_messages = [ | |
{"role": "system", "content": f"You are an enterprise knowledge assistant with ontology access rights...\nContext:\n{enhanced_context}"}, | |
{"role": "user", "content": query} | |
] | |
enhanced_response = llm.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=enhanced_messages | |
) | |
enhanced_answer = enhanced_response.choices[0].message.content | |
st.markdown("#### answer") | |
st.write(enhanced_answer) | |
st.markdown("#### Search context") | |
for i, doc in enumerate(retrieved_docs): | |
source = doc.metadata.get("source", "unknown") | |
label = { | |
"ontology": "Ontology context", | |
"text": "Text context", | |
"ontology_context": "Semantic context", | |
"semantic_path": "Relationship path" | |
}.get(source, f"source") | |
with st.expander(f"{label} {i+1}"): | |
st.markdown(doc.page_content) | |
# Store for reasoning trace visualization | |
st.session_state.query = query | |
st.session_state.retrieved_docs = retrieved_docs | |
st.session_state.answer = enhanced_answer | |
# Difference Analysis | |
st.markdown("---") | |
st.subheader("Difference Analysis") | |
st.markdown(""" | |
The above comparison demonstrates several key advantages of ontology-enhanced RAG: | |
1. **Structure-aware**: Ontology-augmented methods understand the relationships between entities, not just their textual similarities. | |
2. **Multi-hop reasoning**: By using the knowledge graph structure, the enhancement method can connect information across multiple relational jumps. | |
3. **Context enrichment**: Ontologies provide additional context about entity types, attributes, and relationships that are not explicit in the text. | |
4. Reasoning ability: Structured knowledge allows for logical reasoning that vector similarity alone cannot achieve. | |
Try more complex queries that require understanding of relationships to see the differences more clearly! | |
""") | |
def run_knowledge_graph_visualization(): | |
st.title("Knowledge Graph Visualization") | |
# Check if there is a center entity selected | |
central_entity = st.session_state.get('central_entity', None) | |
# Check if there is a center entity selected | |
display_graph_visualization(knowledge_graph, central_entity=central_entity, max_distance=2) | |
# Get and display graphical statistics | |
graph_stats = knowledge_graph.get_graph_statistics() | |
if graph_stats: | |
st.subheader("Graphical Statistics") | |
col1, col2, col3, col4 = st.columns(4) | |
col1.metric("Total number of nodes", graph_stats.get("node_count", 0)) | |
col2.metric("Total number of edges", graph_stats.get("edge_count", 0)) | |
col3.metric("total number of classes", graph_stats.get("class_count", 0)) | |
col4.metric("Total number of instances", graph_stats.get("instance_count", 0)) | |
# Display the central node | |
if "central_nodes" in graph_stats and graph_stats["central_nodes"]: | |
st.subheader("Central Nodes (by Betweenness Centrality)") | |
central_nodes = graph_stats["central_nodes"]["betweenness"] | |
nodes_df = [] | |
for node_info in central_nodes: | |
node_id = node_info["node"] | |
node_data = knowledge_graph.graph.nodes.get(node_id, {}) | |
node_type = node_data.get("type", "unknown") | |
if node_type == "instance": | |
node_class = node_data.get("class_type", "unknown") | |
properties = node_data.get("properties", {}) | |
name = properties.get("name", node_id) | |
nodes_df.append({ | |
"ID": node_id, | |
"Name": name, | |
"type": node_class, | |
"Centrality": node_info["centrality"] | |
}) | |
st.table(nodes_df) | |
def run_ontology_structure_analysis(): | |
st.title("Ontology Structure Analysis") | |
# Use the existing ontology statistics display function | |
display_ontology_stats(ontology_manager) | |
# Add additional class hierarchy visualization | |
st.subheader("class hierarchy") | |
# Get class hierarchy data | |
class_hierarchy = ontology_manager.get_class_hierarchy() | |
# Create a NetworkX graph to represent the class hierarchy | |
G = nx.DiGraph() | |
# Add nodes and edges | |
for parent, children in class_hierarchy.items(): | |
if not G.has_node(parent): | |
G.add_node(parent) | |
for child in children: | |
G.add_node(child) | |
G.add_edge(parent, child) | |
# Check if there are enough nodes to create the visualization | |
if len(G.nodes) > 1: | |
# Generate HTML visualization using knowledge graph class | |
kg = KnowledgeGraph(ontology_manager) | |
html = kg.generate_html_visualization( | |
include_classes=True, | |
include_instances=False, | |
max_distance=5, | |
layout_algorithm="hierarchical" | |
) | |
# Rendering HTML | |
render_html_in_streamlit(html) | |
def run_entity_exploration(): | |
st.title("Entity Exploration") | |
# Get all entities | |
entities = [] | |
for class_name in ontology_manager.get_classes(): | |
entities.extend(ontology_manager.get_instances_of_class(class_name)) | |
# Remove duplicates and sort | |
entities = sorted(set(entities)) | |
# Create a drop-down selection box | |
selected_entity = st.selectbox("Select entity", entities) | |
if selected_entity: | |
# Get entity information | |
entity_info = ontology_manager.get_entity_info(selected_entity) | |
# Display detailed information | |
display_entity_details(entity_info, ontology_manager) | |
# Set this entity as the central entity (for knowledge graph visualization) | |
if st.button("View this entity in the knowledge graph"): | |
st.session_state.central_entity = selected_entity | |
st.rerun() | |
# Get and display entity neighbors | |
st.subheader("Entity Neighborhood") | |
max_distance = st.slider("Maximum neighborhood distance", 1, 3, 1) | |
neighborhood = knowledge_graph.get_entity_neighborhood( | |
selected_entity, | |
max_distance=max_distance, | |
include_classes=True | |
) | |
if neighborhood and "neighbors" in neighborhood: | |
# Display neighbors grouped by distance | |
for distance in range(1, max_distance+1): | |
neighbors_at_distance = [n for n in neighborhood["neighbors"] if n["distance"] == distance] | |
if neighbors_at_distance: | |
with st.expander(f"Neighbors at distance {distance} ({len(neighbors_at_distance)})"): | |
for neighbor in neighbors_at_distance: | |
st.markdown(f"**{neighbor['id']}** ({neighbor.get('class_type', 'unknown')})") | |
# Display relations | |
for relation in neighbor.get("relations", []): | |
direction = "→" if relation["direction"] == "outgoing" else "←" | |
st.markdown(f"- {direction} {relation['type']}") | |
st.markdown("---") | |
def run_semantic_path_visualization(): | |
st.title("Semantic Path Visualization") | |
# Get all entities | |
entities = [] | |
for class_name in ontology_manager.get_classes(): | |
entities.extend(ontology_manager.get_instances_of_class(class_name)) | |
# Remove duplicates and sort | |
entities = sorted(set(entities)) | |
# Create two columns for selecting source and target entities | |
col1, col2 = st.columns(2) | |
with col1: | |
source_entity = st.selectbox("Select source entity", entities, key="source") | |
with col2: | |
target_entity = st.selectbox("Select target entity", entities, key="target") | |
if source_entity and target_entity and source_entity != target_entity: | |
# Provide a maximum path length option | |
max_length = st.slider("Maximum path length", 1, 5, 3) | |
# Find the path | |
paths = knowledge_graph.find_paths_between_entities( | |
source_entity, | |
target_entity, | |
max_length=max_length | |
) | |
if paths: | |
st.success(f"Found {len(paths)} paths!") | |
# Create expanders for each path | |
for i, path in enumerate(paths): | |
# Calculate path length and relationship type | |
path_length = len(path) | |
rel_types = [edge["type"] for edge in path] | |
with st.expander(f"path {i+1} (length: {path_length}, relation: {', '.join(rel_types)})", expanded=(i==0)): | |
# Create a text description of the path | |
path_text = [] | |
entities_in_path = [] | |
for edge in path: | |
source = edge["source"] | |
target = edge["target"] | |
relation = edge["type"] | |
entities_in_path.append(source) | |
entities_in_path.append(target) | |
# Get entity information to get a human-readable name | |
source_info = ontology_manager.get_entity_info(source) | |
target_info = ontology_manager.get_entity_info(target) | |
source_name = source | |
if "properties" in source_info and "name" in source_info["properties"]: | |
source_name = source_info["properties"]["name"] | |
target_name = target | |
if "properties" in target_info and "name" in target_info["properties"]: | |
target_name = target_info["properties"]["name"] | |
path_text.append(f"{source_name} ({source}) **{relation}** {target_name} ({target})") | |
# Display path description | |
st.markdown(" → ".join(path_text)) | |
# Prepare path visualization | |
path_info = { | |
"source": source_entity, | |
"target": target_entity, | |
"path": path, | |
"text": " → ".join(path_text) | |
} | |
# Display path visualization | |
visualize_path(path_info, ontology_manager) | |
else: | |
st.warning(f"No path of length {max_length} or shorter was found between these entities.") | |
def run_reasoning_trace(): | |
st.title("Inference Tracking Visualization") | |
if not st.session_state.get("query") or not st.session_state.get("retrieved_docs") or not st.session_state.get("answer"): | |
st.warning("Please run a query on the RAG comparison page first to generate inference trace data.") | |
return | |
# Get data from session state | |
query = st.session_state.query | |
retrieved_docs = st.session_state.retrieved_docs | |
answer = st.session_state.answer | |
# Show inference trace | |
display_reasoning_trace(query, retrieved_docs, answer, ontology_manager) | |
def run_detailed_comparison(): | |
st.title("Detailed comparison of RAG methods") | |
# Add comparison query options | |
comparison_queries = [ | |
"How does customer feedback influence product development?", | |
"Which employees work in the Engineering department?", | |
"What are the product life cycle stages?", | |
"How do managers monitor employee performance?", | |
"What are the responsibilities of the marketing department?" | |
] | |
selected_query = st.selectbox( | |
"Select Compare Query", | |
comparison_queries, | |
index=0 | |
) | |
custom_query = st.text_input("Or enter a custom query:", "") | |
if custom_query: | |
query = custom_query | |
else: | |
query = selected_query | |
if st.button("Compare RAG methods"): | |
with st.spinner("Run detailed comparison..."): | |
# Start timing | |
import time | |
start_time = time.time() | |
# Run traditional RAG | |
vector_docs = semantic_retriever.vector_store.similarity_search(query, k=k_val) | |
vector_context = "\n\n".join([doc.page_content for doc in vector_docs]) | |
vector_messages = [ | |
{"role": "system", "content": f"You are an enterprise knowledge assistant...\nContext:\n{vector_context}"}, | |
{"role": "user", "content": query} | |
] | |
vector_response = llm.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=vector_messages | |
) | |
vector_answer = vector_response.choices[0].message.content | |
vector_time = time.time() - start_time | |
# Reset the timer | |
start_time = time.time() | |
# Run the enhanced RAG | |
result = semantic_retriever.retrieve_with_paths(query, k=k_val) | |
retrieved_docs = result["documents"] | |
enhanced_context = "\n\n".join([doc.page_content for doc in retrieved_docs]) | |
enhanced_messages = [ | |
{"role": "system", "content": f"You are an enterprise knowledge assistant with ontology access rights...\nContext:\n{enhanced_context}"}, | |
{"role": "user", "content": query} | |
] | |
enhanced_response = llm.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=enhanced_messages | |
) | |
enhanced_answer = enhanced_response.choices[0].message.content | |
enhanced_time = time.time() - start_time | |
# Save the results for visualization | |
st.session_state.query = query | |
st.session_state.retrieved_docs = retrieved_docs | |
st.session_state.answer = enhanced_answer | |
# Display the comparison results | |
st.subheader("Comparison results") | |
# Use tabs to show comparisons in different aspects | |
tab1, tab2, tab3, tab4 = st.tabs(["Answer Comparison", "Performance Indicators", "Retrieval Source Comparison", "Context Quality"]) | |
with tab1: | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown("#### Traditional RAG answer") | |
st.write(vector_answer) | |
with col2: | |
st.markdown("#### Ontology Enhanced RAG Answer") | |
st.write(enhanced_answer) | |
with tab2: | |
# Performance Indicators | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric("Traditional RAG response time", f"{vector_time:.2f}秒") | |
# Calculate text related indicators | |
vector_tokens = len(vector_context.split()) | |
st.metric("Number of retrieved context tokens", vector_tokens) | |
st.metric("Number of retrieved documents", len(vector_docs)) | |
with col2: | |
st.metric("Ontology enhanced RAG response time", f"{enhanced_time:.2f}秒") | |
# Calculate text related indicators | |
enhanced_tokens = len(enhanced_context.split()) | |
st.metric("Number of retrieved context tokens", enhanced_tokens) | |
st.metric("Number of retrieved documents", len(retrieved_docs)) | |
# Add a chart | |
import pandas as pd | |
import plotly.express as px | |
# Performance comparison chart | |
performance_data = { | |
"Metrics": ["Response time (seconds)", "Number of context tags", "Number of retrieved documents"], | |
"Traditional RAG": [vector_time, vector_tokens, len(vector_docs)], | |
"Ontology Enhanced RAG": [enhanced_time, enhanced_tokens, len(retrieved_docs)] | |
} | |
df = pd.DataFrame(performance_data) | |
# Plotly bar chart | |
fig = px.bar( | |
df, | |
x="Indicator", | |
y=["Traditional RAG", "Ontology Enhanced RAG"], | |
barmode="group", | |
title="Performance Index Comparison", | |
labels={"value": "Numerical value", "variable": "RAG method"} | |
) | |
st.plotly_chart(fig) | |
with tab3: | |
# Search source comparison | |
traditional_sources = ["Traditional vector retrieval"] * len(vector_docs) | |
enhanced_sources = [] | |
for doc in retrieved_docs: | |
source = doc.metadata.get("source", "unknown") | |
label = { | |
"ontology": "Ontology context", | |
"text": "Text context", | |
"ontology_context": "Semantic context", | |
"semantic_path": "Relationship path" | |
}.get(source, "unknown source") | |
enhanced_sources.append(label) | |
# Create a source distribution chart | |
source_counts = {} | |
for source in enhanced_sources: | |
if source in source_counts: | |
source_counts[source] += 1 | |
else: | |
source_counts[source] = 1 | |
source_df = pd.DataFrame({ | |
"Source type": list(source_counts.keys()), | |
"Number of documents": list(source_counts.values()) | |
}) | |
fig = px.pie( | |
source_df, | |
values="Number of documents", | |
names="Source type", | |
title="Ontology-enhanced RAG retrieval source distribution" | |
) | |
st.plotly_chart(fig) | |
# Show the relationship between the source and the answer | |
st.subheader("Relationship between source and answer") | |
st.markdown(""" | |
Ontology-enhanced methods leverage multiple sources of knowledge to construct more comprehensive answers. The figure above shows the distribution of different sources. | |
In particular, semantic context and relation paths provide knowledge that cannot be captured by traditional vector retrieval, enabling the system to connect concepts and perform multi-hop reasoning. | |
""") | |
with tab4: | |
# Contextual quality assessment | |
st.subheader("Contextual Quality Assessment") | |
# Create an evaluation function (simplified version) | |
def evaluate_context(docs): | |
metrics = { | |
"Direct Relevance": 0, | |
"Semantic Richness": 0, | |
"Structure Information": 0, | |
"Relationship Information": 0 | |
} | |
for doc in docs: | |
content = doc.page_content if hasattr(doc, "page_content") else "" | |
# Direct Relevance - Based on Keywords | |
if any(kw in content.lower() for kw in query.lower().split()): | |
metrics["direct relevance"] += 1 | |
# Semantic richness - based on text length | |
metrics["semantic richness"] += min(1, len(content.split()) / 50) | |
# Structural information - from the body | |
if hasattr(doc, "metadata") and doc.metadata.get("source") in ["ontology", "ontology_context"]: | |
metrics["Structure Information"] += 1 | |
# Relationship information - from path | |
if hasattr(doc, "metadata") and doc.metadata.get("source") == "semantic_path": | |
metrics["relationship information"] += 1 | |
# Standardization | |
for key in metrics: | |
metrics[key] = min(10, metrics[key]) | |
return metrics | |
# Evaluate the two methods | |
vector_metrics = evaluate_context(vector_docs) | |
enhanced_metrics = evaluate_context(retrieved_docs) | |
# Create a comparative radar chart | |
metrics_df = pd.DataFrame({ | |
"metrics": list(vector_metrics.keys()), | |
"Traditional RAG": list(vector_metrics.values()), | |
"Ontology Enhanced RAG": list(enhanced_metrics.values()) | |
}) | |
# Convert data to Plotly radar chart format | |
fig = px.line_polar( | |
metrics_df, | |
r=["Traditional RAG", "Ontology Enhanced RAG"], | |
theta="Indicator", | |
line_close=True, | |
range_r=[0, 10], | |
title="Contextual Quality Comparison" | |
) | |
st.plotly_chart(fig) | |
st.markdown(""" | |
The figure above shows the comparison of the two RAG methods in terms of contextual quality. Ontology-enhanced RAG performs better in multiple dimensions: | |
1. **Direct relevance**: the degree of relevance between the search content and the query | |
2. **Semantic Richness**: Information density and richness of the retrieval context | |
3. **Structural information**: structured knowledge of entity types, attributes, and relationships | |
4. **Relationship information**: explicit relationships and connection paths between entities | |
The advantage of ontology-enhanced RAG is that it can retrieve structured knowledge and relational information, which are missing in traditional RAG methods. | |
""") | |
# Display detailed analysis section | |
st.subheader("Method Effect Analysis") | |
with st.expander("Comparison of advantages and disadvantages", expanded=True): | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown("#### Traditional RAG") | |
st.markdown(""" | |
**Advantages**: | |
- Simple implementation and light computational burden | |
- Works well with unstructured text | |
- Response times are usually faster | |
**Disadvantages**: | |
- Unable to capture relationships between entities | |
- Lack of context for structured knowledge | |
- Difficult to perform multi-hop reasoning | |
- Retrieval is mainly based on text similarity | |
""") | |
with col2: | |
st.markdown("#### Ontology Enhanced RAG") | |
st.markdown(""" | |
**Advantages**: | |
- Ability to understand relationships and connections between entities | |
- Provides rich structured knowledge context | |
- Support multi-hop reasoning and path discovery | |
- Combining vector similarity and semantic relationship | |
**Disadvantages**: | |
- Higher implementation complexity | |
- Need to maintain the ontology model | |
- The computational overhead is relatively high | |
- Retrieval and inference times may be longer | |
""") | |
# Add usage scenario suggestions | |
with st.expander("Applicable scenarios"): | |
st.markdown(""" | |
### Traditional RAG applicable scenarios | |
- Simple fact-finding | |
- Unstructured document retrieval | |
- Applications with high response time requirements | |
- When the document content is clear and direct | |
### Applicable scenarios for Ontology Enhanced RAG | |
- Complex knowledge association query | |
- Problems that require understanding of relationships between entities | |
- Applications that require cross-domain reasoning | |
- Enterprise Knowledge Management System | |
- Reasoning scenarios that require high accuracy and consistency | |
- Applications that require implicit knowledge discovery | |
""") | |
# Add practical application examples | |
with st.expander("Actual Application Case"): | |
st.markdown(""" | |
### Enterprise Knowledge Management | |
Ontology-enhanced RAG systems can help enterprises effectively organize and access their knowledge assets, connect information in different departments and systems, and provide more comprehensive business insights. | |
### Product development decision support | |
By understanding the relationship between customer feedback, product features, and market data, the system can provide more valuable support for product development decisions. | |
### Complex compliance query | |
In compliance problems that require consideration of multiple rules and relationships, ontology-enhanced RAG can provide rule-based reasoning, ensuring that recommendations comply with all applicable policies and regulations. | |
### Diagnostics and Troubleshooting | |
In technical support and troubleshooting scenarios, the system can connect symptoms, causes, and solutions to provide more accurate diagnoses through multi-hop reasoning. | |
""") |