Add 3 files
Browse files- README.md +7 -5
- index.html +323 -19
- prompts.txt +0 -0
README.md
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: static
|
7 |
pinned: false
|
|
|
|
|
8 |
---
|
9 |
|
10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: jepa
|
3 |
+
emoji: 🐳
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: pink
|
6 |
sdk: static
|
7 |
pinned: false
|
8 |
+
tags:
|
9 |
+
- deepsite
|
10 |
---
|
11 |
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
index.html
CHANGED
@@ -1,19 +1,323 @@
|
|
1 |
-
<!
|
2 |
-
<html>
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>JEPA and Cognitive Architectures</title>
|
7 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
8 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
9 |
+
<style>
|
10 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
|
11 |
+
|
12 |
+
body {
|
13 |
+
font-family: 'Inter', sans-serif;
|
14 |
+
background-color: #f9fafb;
|
15 |
+
color: #111827;
|
16 |
+
}
|
17 |
+
|
18 |
+
.gradient-header {
|
19 |
+
background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%);
|
20 |
+
}
|
21 |
+
|
22 |
+
.diagram-container {
|
23 |
+
background-color: #f3f4f6;
|
24 |
+
border-radius: 0.5rem;
|
25 |
+
padding: 1.5rem;
|
26 |
+
margin: 1.5rem 0;
|
27 |
+
border-left: 4px solid #4f46e5;
|
28 |
+
}
|
29 |
+
|
30 |
+
.concept-card {
|
31 |
+
transition: all 0.3s ease;
|
32 |
+
border-radius: 0.5rem;
|
33 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
|
34 |
+
}
|
35 |
+
|
36 |
+
.concept-card:hover {
|
37 |
+
transform: translateY(-2px);
|
38 |
+
box-shadow: 0 10px 15px -3px rgba(0,0,0,0.1);
|
39 |
+
}
|
40 |
+
|
41 |
+
.section-divider {
|
42 |
+
border-top: 2px dashed #d1d5db;
|
43 |
+
margin: 2rem 0;
|
44 |
+
}
|
45 |
+
|
46 |
+
.key-point {
|
47 |
+
background-color: #eef2ff;
|
48 |
+
border-left: 4px solid #4f46e5;
|
49 |
+
padding: 1rem;
|
50 |
+
margin: 1rem 0;
|
51 |
+
border-radius: 0 0.375rem 0.375rem 0;
|
52 |
+
}
|
53 |
+
|
54 |
+
code {
|
55 |
+
background-color: #f3f4f6;
|
56 |
+
padding: 0.2rem 0.4rem;
|
57 |
+
border-radius: 0.25rem;
|
58 |
+
font-family: 'Courier New', monospace;
|
59 |
+
font-size: 0.9em;
|
60 |
+
color: #7c3aed;
|
61 |
+
}
|
62 |
+
|
63 |
+
.pseudo-code {
|
64 |
+
background-color: #1e293b;
|
65 |
+
color: #f8fafc;
|
66 |
+
padding: 1rem;
|
67 |
+
border-radius: 0.5rem;
|
68 |
+
font-family: 'Courier New', monospace;
|
69 |
+
overflow-x: auto;
|
70 |
+
margin: 1.5rem 0;
|
71 |
+
}
|
72 |
+
|
73 |
+
.pseudo-code .keyword {
|
74 |
+
color: #f472b6;
|
75 |
+
}
|
76 |
+
|
77 |
+
.pseudo-code .comment {
|
78 |
+
color: #94a3b8;
|
79 |
+
font-style: italic;
|
80 |
+
}
|
81 |
+
|
82 |
+
.pseudo-code .string {
|
83 |
+
color: #86efac;
|
84 |
+
}
|
85 |
+
|
86 |
+
.pseudo-code .function {
|
87 |
+
color: #60a5fa;
|
88 |
+
}
|
89 |
+
</style>
|
90 |
+
</head>
|
91 |
+
<body class="bg-gray-50">
|
92 |
+
<div class="max-w-5xl mx-auto px-4 py-8">
|
93 |
+
<!-- Header -->
|
94 |
+
<header class="gradient-header text-white rounded-xl p-8 mb-8 shadow-lg">
|
95 |
+
<div class="flex items-center justify-between">
|
96 |
+
<div>
|
97 |
+
<h1 class="text-4xl font-bold mb-2">JEPA and Cognitive Architectures</h1>
|
98 |
+
<p class="text-xl opacity-90">A Comprehensive Introduction to Predictive AI Systems</p>
|
99 |
+
</div>
|
100 |
+
<div class="bg-white/20 p-4 rounded-lg">
|
101 |
+
<i class="fas fa-brain text-4xl"></i>
|
102 |
+
</div>
|
103 |
+
</div>
|
104 |
+
</header>
|
105 |
+
|
106 |
+
<!-- Navigation -->
|
107 |
+
<nav class="bg-white rounded-lg shadow-sm p-4 mb-8 sticky top-4 z-10">
|
108 |
+
<ul class="flex flex-wrap gap-4 justify-center">
|
109 |
+
<li><a href="#motivation" class="text-indigo-600 hover:text-indigo-800 font-medium">Motivation</a></li>
|
110 |
+
<li><a href="#jepa-core" class="text-indigo-600 hover:text-indigo-800 font-medium">JEPA Core</a></li>
|
111 |
+
<li><a href="#cognitive-arch" class="text-indigo-600 hover:text-indigo-800 font-medium">Cognitive Architecture</a></li>
|
112 |
+
<li><a href="#modules" class="text-indigo-600 hover:text-indigo-800 font-medium">Modules</a></li>
|
113 |
+
<li><a href="#examples" class="text-indigo-600 hover:text-indigo-800 font-medium">Examples</a></li>
|
114 |
+
<li><a href="#conclusion" class="text-indigo-600 hover:text-indigo-800 font-medium">Conclusion</a></li>
|
115 |
+
</ul>
|
116 |
+
</nav>
|
117 |
+
|
118 |
+
<!-- Main Content -->
|
119 |
+
<main class="space-y-8">
|
120 |
+
<!-- Motivation Section -->
|
121 |
+
<section id="motivation" class="bg-white rounded-xl shadow-sm p-6">
|
122 |
+
<h2 class="text-2xl font-bold mb-4 text-gray-800 flex items-center">
|
123 |
+
<i class="fas fa-lightbulb text-yellow-500 mr-3"></i>
|
124 |
+
<span>1. Motivation and Background</span>
|
125 |
+
</h2>
|
126 |
+
|
127 |
+
<h3 class="text-xl font-semibold mt-6 mb-3 text-gray-700">1.1 The Need for Predictive Representations</h3>
|
128 |
+
<p class="text-gray-700 mb-4">
|
129 |
+
Modern AI systems must <span class="font-medium">perceive</span>, <span class="font-medium">reason</span>, and <span class="font-medium">act</span> in complex, dynamic environments. Human intelligence excels not because we memorize every detail, but because we <span class="font-medium">summarize</span>, <span class="font-medium">predict</span>, and <span class="font-medium">plan</span> using abstract representations—ignoring irrelevant noise and focusing on what is useful for future reasoning or action.
|
130 |
+
</p>
|
131 |
+
<p class="text-gray-700 mb-4">
|
132 |
+
Recent advances in deep learning (e.g., large language models, vision transformers) have shown the power of self-supervised representation learning. However, standard architectures (like autoregressive models) are often forced to model all details, including noise and unpredictability, limiting robustness and sample efficiency.
|
133 |
+
</p>
|
134 |
+
|
135 |
+
<h3 class="text-xl font-semibold mt-6 mb-3 text-gray-700">1.2 Enter JEPA: Joint Embedding Predictive Architecture</h3>
|
136 |
+
<p class="text-gray-700">
|
137 |
+
Proposed by Yann LeCun and colleagues, <span class="font-medium text-indigo-700">JEPA</span> offers a novel approach:
|
138 |
+
</p>
|
139 |
+
<ul class="list-disc pl-6 mt-2 space-y-2 text-gray-700">
|
140 |
+
<li><span class="font-medium">Learn representations by predicting only what is predictable</span>—not every detail, but the essential structure that allows for accurate reasoning and planning.</li>
|
141 |
+
</ul>
|
142 |
+
|
143 |
+
<div class="key-point mt-6">
|
144 |
+
<p class="font-medium text-gray-800">Key Insight:</p>
|
145 |
+
<p>JEPA focuses on learning the predictable aspects of data while ignoring unpredictable noise, leading to more robust and efficient representations.</p>
|
146 |
+
</div>
|
147 |
+
</section>
|
148 |
+
|
149 |
+
<!-- JEPA Core Section -->
|
150 |
+
<section id="jepa-core" class="bg-white rounded-xl shadow-sm p-6">
|
151 |
+
<h2 class="text-2xl font-bold mb-4 text-gray-800 flex items-center">
|
152 |
+
<i class="fas fa-puzzle-piece text-blue-500 mr-3"></i>
|
153 |
+
<span>2. JEPA: Core Ideas and Mechanism</span>
|
154 |
+
</h2>
|
155 |
+
|
156 |
+
<h3 class="text-xl font-semibold mt-6 mb-3 text-gray-700">2.1 What is JEPA?</h3>
|
157 |
+
<p class="text-gray-700 mb-4">
|
158 |
+
<span class="font-medium text-indigo-700">JEPA (Joint Embedding Predictive Architecture)</span> is a self-supervised learning framework where a model is trained to embed contexts (observed parts) and targets (future or missing parts) into a shared semantic space.
|
159 |
+
</p>
|
160 |
+
|
161 |
+
<div class="bg-blue-50 p-4 rounded-lg mb-6">
|
162 |
+
<p class="font-medium text-blue-800">Objective:</p>
|
163 |
+
<ul class="list-disc pl-6 mt-2 space-y-1 text-blue-800">
|
164 |
+
<li>If the context and target belong together (e.g., two halves of the same image, or a sentence and its continuation), their embeddings should be <span class="font-medium">close</span>.</li>
|
165 |
+
<li>If they do not (random combinations), their embeddings should be <span class="font-medium">far apart</span>.</li>
|
166 |
+
<li>This is typically implemented via a <span class="font-medium">contrastive loss</span>.</li>
|
167 |
+
</ul>
|
168 |
+
</div>
|
169 |
+
|
170 |
+
<h3 class="text-xl font-semibold mt-6 mb-3 text-gray-700">2.2 Why Is This Powerful?</h3>
|
171 |
+
<div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
|
172 |
+
<div class="concept-card bg-white p-4 border border-gray-200">
|
173 |
+
<div class="text-purple-600 mb-2">
|
174 |
+
<i class="fas fa-filter text-xl"></i>
|
175 |
+
</div>
|
176 |
+
<h4 class="font-semibold mb-2">Focuses on Structure</h4>
|
177 |
+
<p class="text-sm text-gray-600">Encodes only predictable, meaningful features while ignoring noise</p>
|
178 |
+
</div>
|
179 |
+
<div class="concept-card bg-white p-4 border border-gray-200">
|
180 |
+
<div class="text-green-600 mb-2">
|
181 |
+
<i class="fas fa-shapes text-xl"></i>
|
182 |
+
</div>
|
183 |
+
<h4 class="font-semibold mb-2">Multi-Modal</h4>
|
184 |
+
<p class="text-sm text-gray-600">Works for vision, language, audio, video, and more</p>
|
185 |
+
</div>
|
186 |
+
<div class="concept-card bg-white p-4 border border-gray-200">
|
187 |
+
<div class="text-red-600 mb-2">
|
188 |
+
<i class="fas fa-robot text-xl"></i>
|
189 |
+
</div>
|
190 |
+
<h4 class="font-semibold mb-2">Transferable Features</h4>
|
191 |
+
<p class="text-sm text-gray-600">Learns representations useful for reasoning and planning</p>
|
192 |
+
</div>
|
193 |
+
</div>
|
194 |
+
|
195 |
+
<h3 class="text-xl font-semibold mt-6 mb-3 text-gray-700">2.3 The JEPA Training Loop</h3>
|
196 |
+
<div class="diagram-container">
|
197 |
+
<div class="flex flex-col items-center">
|
198 |
+
<div class="flex items-center justify-center space-x-8 mb-6">
|
199 |
+
<div class="text-center">
|
200 |
+
<div class="bg-indigo-100 p-3 rounded-lg inline-block">
|
201 |
+
<i class="fas fa-eye text-indigo-600 text-2xl"></i>
|
202 |
+
</div>
|
203 |
+
<p class="mt-2 font-medium">Context Encoder</p>
|
204 |
+
<p class="text-sm text-gray-600">Takes observed input</p>
|
205 |
+
</div>
|
206 |
+
<div class="text-center">
|
207 |
+
<div class="bg-indigo-100 p-3 rounded-lg inline-block">
|
208 |
+
<i class="fas fa-project-diagram text-indigo-600 text-2xl"></i>
|
209 |
+
</div>
|
210 |
+
<p class="mt-2 font-medium">Embedding Space</p>
|
211 |
+
<p class="text-sm text-gray-600">Shared representation</p>
|
212 |
+
</div>
|
213 |
+
<div class="text-center">
|
214 |
+
<div class="bg-indigo-100 p-3 rounded-lg inline-block">
|
215 |
+
<i class="fas fa-bullseye text-indigo-600 text-2xl"></i>
|
216 |
+
</div>
|
217 |
+
<p class="mt-2 font-medium">Target Encoder</p>
|
218 |
+
<p class="text-sm text-gray-600">Takes future/missing part</p>
|
219 |
+
</div>
|
220 |
+
</div>
|
221 |
+
<div class="w-full bg-indigo-50 p-4 rounded-lg">
|
222 |
+
<div class="flex justify-between items-center px-4">
|
223 |
+
<div class="text-center">
|
224 |
+
<p class="font-medium">Input Context</p>
|
225 |
+
<p class="text-sm">(e.g., left image half)</p>
|
226 |
+
</div>
|
227 |
+
<div class="text-center">
|
228 |
+
<p class="font-medium">Similarity</p>
|
229 |
+
<p class="text-sm">Contrastive Loss</p>
|
230 |
+
</div>
|
231 |
+
<div class="text-center">
|
232 |
+
<p class="font-medium">Input Target</p>
|
233 |
+
<p class="text-sm">(e.g., right image half)</p>
|
234 |
+
</div>
|
235 |
+
</div>
|
236 |
+
</div>
|
237 |
+
</div>
|
238 |
+
</div>
|
239 |
+
|
240 |
+
<h4 class="font-semibold mt-6 mb-2 text-gray-700">Concrete Examples:</h4>
|
241 |
+
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
|
242 |
+
<div class="bg-gray-50 p-4 rounded-lg border border-gray-200">
|
243 |
+
<div class="flex items-center mb-2">
|
244 |
+
<div class="bg-purple-100 p-2 rounded-full mr-3">
|
245 |
+
<i class="fas fa-image text-purple-600"></i>
|
246 |
+
</div>
|
247 |
+
<h5 class="font-medium">Vision Example</h5>
|
248 |
+
</div>
|
249 |
+
<ul class="list-disc pl-6 text-sm text-gray-700">
|
250 |
+
<li>Context: Left half of a cat image</li>
|
251 |
+
<li>Target: Right half</li>
|
252 |
+
<li>Embeddings should be close if they come from the same photo, far otherwise</li>
|
253 |
+
</ul>
|
254 |
+
</div>
|
255 |
+
<div class="bg-gray-50 p-4 rounded-lg border border-gray-200">
|
256 |
+
<div class="flex items-center mb-2">
|
257 |
+
<div class="bg-green-100 p-2 rounded-full mr-3">
|
258 |
+
<i class="fas fa-language text-green-600"></i>
|
259 |
+
</div>
|
260 |
+
<h5 class="font-medium">Language Example</h5>
|
261 |
+
</div>
|
262 |
+
<ul class="list-disc pl-6 text-sm text-gray-700">
|
263 |
+
<li>Context: "The cat sat on the"</li>
|
264 |
+
<li>Target: "mat"</li>
|
265 |
+
<li>Close if the sequence is real, far if target is random</li>
|
266 |
+
</ul>
|
267 |
+
</div>
|
268 |
+
</div>
|
269 |
+
</section>
|
270 |
+
|
271 |
+
<!-- Cognitive Architecture Section -->
|
272 |
+
<section id="cognitive-arch" class="bg-white rounded-xl shadow-sm p-6">
|
273 |
+
<h2 class="text-2xl font-bold mb-4 text-gray-800 flex items-center">
|
274 |
+
<i class="fas fa-sitemap text-teal-500 mr-3"></i>
|
275 |
+
<span>3. From Representation to Reasoning: JEPA in Cognitive Architectures</span>
|
276 |
+
</h2>
|
277 |
+
|
278 |
+
<p class="text-gray-700 mb-4">
|
279 |
+
JEPA shines as a <span class="font-medium">perception module</span> within a larger, <span class="font-medium">modular cognitive agent</span>. This mirrors biological systems: sensory organs and cortex encode perceptions, while higher reasoning and planning are handled by specialized systems.
|
280 |
+
</p>
|
281 |
+
|
282 |
+
<h3 class="text-xl font-semibold mt-6 mb-3 text-gray-700">3.1 The Modular Agent</h3>
|
283 |
+
<p class="text-gray-700 mb-4">
|
284 |
+
The LeCun-style architecture for an intelligent agent typically includes:
|
285 |
+
</p>
|
286 |
+
|
287 |
+
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4 mb-6">
|
288 |
+
<div class="concept-card bg-indigo-50 p-4">
|
289 |
+
<div class="flex items-center mb-2">
|
290 |
+
<div class="bg-indigo-100 p-2 rounded-full mr-3">
|
291 |
+
<i class="fas fa-eye text-indigo-600"></i>
|
292 |
+
</div>
|
293 |
+
<h4 class="font-medium">1. Perception Module (JEPA)</h4>
|
294 |
+
</div>
|
295 |
+
<p class="text-sm text-gray-700">Encodes current observation into a compact, predictive embedding</p>
|
296 |
+
</div>
|
297 |
+
<div class="concept-card bg-blue-50 p-4">
|
298 |
+
<div class="flex items-center mb-2">
|
299 |
+
<div class="bg-blue-100 p-2 rounded-full mr-3">
|
300 |
+
<i class="fas fa-memory text-blue-600"></i>
|
301 |
+
</div>
|
302 |
+
<h4 class="font-medium">2. Short-term Memory</h4>
|
303 |
+
</div>
|
304 |
+
<p class="text-sm text-gray-700">Stores recent sequence of embeddings (history)</p>
|
305 |
+
</div>
|
306 |
+
<div class="concept-card bg-purple-50 p-4">
|
307 |
+
<div class="flex items-center mb-2">
|
308 |
+
<div class="bg-purple-100 p-2 rounded-full mr-3">
|
309 |
+
<i class="fas fa-globe text-purple-600"></i>
|
310 |
+
</div>
|
311 |
+
<h4 class="font-medium">3. World Model</h4>
|
312 |
+
</div>
|
313 |
+
<p class="text-sm text-gray-700">Integrates the sequence to produce a latent state</p>
|
314 |
+
</div>
|
315 |
+
<div class="concept-card bg-green-50 p-4">
|
316 |
+
<div class="flex items-center mb-2">
|
317 |
+
<div class="bg-green-100 p-2 rounded-full mr-3">
|
318 |
+
<i class="fas fa-cogs text-green-600"></i>
|
319 |
+
</div>
|
320 |
+
<h4 class="font-medium">4. Configurator</h4>
|
321 |
+
</div>
|
322 |
+
|
323 |
+
</html>
|
prompts.txt
ADDED
File without changes
|