pierre-livetrend's picture
Add 2 files
ac0d9c6 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Comprehensive RAG Process Diagram</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<style>
@keyframes float {
0%, 100% { transform: translateY(0); }
50% { transform: translateY(-10px); }
}
.floating {
animation: float 3s ease-in-out infinite;
}
.document-flow {
background: linear-gradient(90deg, #3b82f6, #8b5cf6);
}
.query-flow {
background: linear-gradient(90deg, #8b5cf6, #f59e0b);
}
.response-flow {
background: linear-gradient(90deg, #f59e0b, #ef4444);
}
.section-divider {
height: 2px;
background: linear-gradient(90deg, #3b82f6, #8b5cf6, #f59e0b, #ef4444);
}
.arrow-connector {
position: relative;
}
.arrow-connector:after {
content: "→";
position: absolute;
right: -30px;
top: 50%;
transform: translateY(-50%);
font-size: 24px;
color: #4f46e5;
}
.last-arrow:after {
display: none;
}
.tech-note {
border-left: 4px solid #6366f1;
background-color: #f8fafc;
}
.vector-db {
border: 2px dashed #10b981;
}
</style>
</head>
<body class="bg-gray-50 min-h-screen flex items-center justify-center p-4">
<div class="max-w-7xl w-full bg-white rounded-xl shadow-2xl overflow-hidden">
<!-- Header -->
<div class="bg-gradient-to-r from-indigo-500 to-purple-600 p-6">
<h1 class="text-3xl font-bold text-white text-center">Retrieval-Augmented Generation (RAG) System</h1>
<p class="text-indigo-100 text-center mt-2">Complete workflow from document ingestion to response generation</p>
</div>
<!-- Main Diagram -->
<div class="p-8">
<!-- Document Processing Pipeline -->
<div class="mb-12">
<div class="flex items-center mb-6">
<div class="w-4 h-8 bg-blue-500 rounded mr-3"></div>
<h2 class="text-2xl font-bold text-gray-800">1. Document Processing Pipeline</h2>
</div>
<div class="grid grid-cols-1 md:grid-cols-5 gap-6 mb-8">
<!-- Input -->
<div class="bg-blue-50 p-6 rounded-lg arrow-connector">
<div class="w-16 h-16 rounded-full bg-blue-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-file-alt text-2xl text-blue-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">Input Documents</h3>
<div class="flex justify-center space-x-2">
<div class="bg-white p-2 rounded shadow">
<i class="fas fa-file-pdf text-red-500"></i>
<span class="text-xs">PDF</span>
</div>
<div class="bg-white p-2 rounded shadow">
<i class="fas fa-file-alt text-blue-500"></i>
<span class="text-xs">TXT</span>
</div>
</div>
</div>
<!-- Loading -->
<div class="bg-blue-50 p-6 rounded-lg arrow-connector">
<div class="w-16 h-16 rounded-full bg-blue-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-download text-2xl text-blue-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">Document Loading</h3>
<div class="tech-note p-3 rounded mt-2">
<p class="text-xs text-gray-600">
<span class="font-semibold">TextFileLoader</span> and <span class="font-semibold">PDFFileLoader</span> handle different formats
</p>
</div>
</div>
<!-- Splitting -->
<div class="bg-blue-50 p-6 rounded-lg arrow-connector">
<div class="w-16 h-16 rounded-full bg-blue-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-cut text-2xl text-blue-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">Text Splitting</h3>
<div class="tech-note p-3 rounded mt-2">
<p class="text-xs text-gray-600">
<span class="font-semibold">CharacterTextSplitter</span><br>
Chunk size: 1000 chars<br>
Overlap: 200 chars
</p>
</div>
</div>
<!-- Embedding -->
<div class="bg-blue-50 p-6 rounded-lg arrow-connector">
<div class="w-16 h-16 rounded-full bg-blue-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-cube text-2xl text-blue-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">Embedding Generation</h3>
<div class="tech-note p-3 rounded mt-2">
<p class="text-xs text-gray-600">
<span class="font-semibold">text-embedding-3-small</span><br>
1536-dimensional vectors
</p>
</div>
</div>
<!-- Vector Storage -->
<div class="bg-blue-50 p-6 rounded-lg vector-db last-arrow">
<div class="w-16 h-16 rounded-full bg-green-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-database text-2xl text-green-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">Vector Database</h3>
<div class="tech-note p-3 rounded mt-2">
<p class="text-xs text-gray-600">
Dictionary of numpy arrays<br>
<span class="font-semibold">Async processing</span> for performance
</p>
</div>
</div>
</div>
</div>
<div class="section-divider my-12"></div>
<!-- Query Processing Pipeline -->
<div class="mb-12">
<div class="flex items-center mb-6">
<div class="w-4 h-8 bg-purple-500 rounded mr-3"></div>
<h2 class="text-2xl font-bold text-gray-800">2. Query Processing Pipeline</h2>
</div>
<div class="grid grid-cols-1 md:grid-cols-4 gap-6 mb-8">
<!-- User Query -->
<div class="bg-purple-50 p-6 rounded-lg arrow-connector">
<div class="w-16 h-16 rounded-full bg-purple-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-question text-2xl text-purple-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">User Query</h3>
<div class="bg-white p-3 rounded shadow mt-2">
<p class="text-sm text-gray-700">"What is RAG?"</p>
</div>
</div>
<!-- Query Embedding -->
<div class="bg-purple-50 p-6 rounded-lg arrow-connector">
<div class="w-16 h-16 rounded-full bg-purple-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-cube text-2xl text-purple-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">Query Embedding</h3>
<div class="tech-note p-3 rounded mt-2">
<p class="text-xs text-gray-600">
Same embedding model as documents
</p>
</div>
</div>
<!-- Similarity Search -->
<div class="bg-purple-50 p-6 rounded-lg arrow-connector">
<div class="w-16 h-16 rounded-full bg-purple-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-search text-2xl text-purple-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">Similarity Search</h3>
<div class="tech-note p-3 rounded mt-2">
<p class="text-xs text-gray-600">
<span class="font-semibold">Cosine similarity</span> as distance metric
</p>
</div>
</div>
<!-- Retrieval -->
<div class="bg-purple-50 p-6 rounded-lg last-arrow">
<div class="w-16 h-16 rounded-full bg-purple-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-book-open text-2xl text-purple-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">Retrieval</h3>
<div class="tech-note p-3 rounded mt-2">
<p class="text-xs text-gray-600">
Top <span class="font-semibold">k=4</span> most relevant chunks
</p>
</div>
</div>
</div>
</div>
<div class="section-divider my-12"></div>
<!-- Response Generation Pipeline -->
<div class="mb-8">
<div class="flex items-center mb-6">
<div class="w-4 h-8 bg-orange-500 rounded mr-3"></div>
<h2 class="text-2xl font-bold text-gray-800">3. Response Generation Pipeline</h2>
</div>
<div class="grid grid-cols-1 md:grid-cols-4 gap-6">
<!-- Prompt Construction -->
<div class="bg-orange-50 p-6 rounded-lg arrow-connector">
<div class="w-16 h-16 rounded-full bg-orange-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-comment-alt text-2xl text-orange-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">Prompt Construction</h3>
<div class="tech-note p-3 rounded mt-2">
<p class="text-xs text-gray-600">
<span class="font-semibold">System:</span> "Use the provided context..."
</p>
<p class="text-xs text-gray-600 mt-1">
<span class="font-semibold">User:</span> Query + Retrieved Context
</p>
</div>
</div>
<!-- LLM Processing -->
<div class="bg-orange-50 p-6 rounded-lg arrow-connector">
<div class="w-16 h-16 rounded-full bg-orange-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-brain text-2xl text-orange-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">LLM Processing</h3>
<div class="tech-note p-3 rounded mt-2">
<p class="text-xs text-gray-600">
<span class="font-semibold">gpt-4o-mini</span><br>
Zero-shot in-context learning
</p>
</div>
</div>
<!-- Response Generation -->
<div class="bg-orange-50 p-6 rounded-lg arrow-connector">
<div class="w-16 h-16 rounded-full bg-orange-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-robot text-2xl text-orange-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">Response Generation</h3>
<div class="bg-white p-3 rounded shadow mt-2">
<p class="text-sm text-gray-700">"RAG combines retrieval with generation..."</p>
</div>
</div>
<!-- Final Answer -->
<div class="bg-red-50 p-6 rounded-lg last-arrow">
<div class="w-16 h-16 rounded-full bg-red-100 flex items-center justify-center mb-4 mx-auto floating">
<i class="fas fa-user text-2xl text-red-600"></i>
</div>
<h3 class="font-semibold text-lg text-gray-800 mb-2 text-center">Final Answer</h3>
<div class="tech-note p-3 rounded mt-2">
<p class="text-xs text-gray-600">
Factual response grounded in retrieved context
</p>
</div>
</div>
</div>
</div>
<!-- Enhancement Section -->
<div class="mt-16 p-6 bg-gray-50 rounded-lg border border-gray-200">
<h2 class="text-xl font-bold text-gray-800 mb-4 flex items-center">
<i class="fas fa-star text-yellow-500 mr-2"></i>
PDF Processing Enhancement
</h2>
<div class="grid grid-cols-1 md:grid-cols-3 gap-6">
<div class="bg-white p-4 rounded-lg shadow">
<div class="flex items-center mb-3">
<div class="w-10 h-10 rounded-full bg-blue-100 flex items-center justify-center mr-3">
<i class="fas fa-file-pdf text-blue-500"></i>
</div>
<h3 class="font-semibold">File Type Detection</h3>
</div>
<p class="text-sm text-gray-600">DocumentLoader routes PDFs to PDFFileLoader based on file extension</p>
</div>
<div class="bg-white p-4 rounded-lg shadow">
<div class="flex items-center mb-3">
<div class="w-10 h-10 rounded-full bg-purple-100 flex items-center justify-center mr-3">
<i class="fas fa-file-alt text-purple-500"></i>
</div>
<h3 class="font-semibold">Page-by-Page Processing</h3>
</div>
<p class="text-sm text-gray-600">PDFs are processed one page at a time with page markers for reference</p>
</div>
<div class="bg-white p-4 rounded-lg shadow">
<div class="flex items-center mb-3">
<div class="w-10 h-10 rounded-full bg-green-100 flex items-center justify-center mr-3">
<i class="fas fa-code-merge text-green-500"></i>
</div>
<h3 class="font-semibold">Content Combination</h3>
</div>
<p class="text-sm text-gray-600">Processed pages are combined into a single document before splitting</p>
</div>
</div>
</div>
</div>
<!-- Footer -->
<div class="bg-gray-100 p-4 text-center">
<p class="text-sm text-gray-600">RAG System Implementation - Combining retrieval with generation for factual responses</p>
</div>
</div>
<script>
// Animation for elements when they come into view
document.addEventListener('DOMContentLoaded', function() {
const sections = document.querySelectorAll('.grid > div');
const observer = new IntersectionObserver((entries) => {
entries.forEach(entry => {
if (entry.isIntersecting) {
entry.target.style.opacity = '1';
entry.target.style.transform = 'translateY(0)';
}
});
}, { threshold: 0.1 });
sections.forEach(section => {
section.style.opacity = '0';
section.style.transform = 'translateY(20px)';
section.style.transition = 'opacity 0.5s ease, transform 0.5s ease';
observer.observe(section);
});
});
</script>
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=pierre-livetrend/rag-system-implementation" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
</html>