Update index.html
Browse files- index.html +43 -12
index.html
CHANGED
@@ -34,9 +34,9 @@
|
|
34 |
<img src="figures/ARvsFM.png" alt="AR vs FM" style="width: 100%; border-radius: 20px; box-shadow: 0 4px 16px rgba(0,0,0,0.2); margin-bottom: 20px;">
|
35 |
<h1>AR vs FM: A Comparative Study on Audio Modeling Paradigms</h1>
|
36 |
<p>
|
37 |
-
<a href="https://scholar.google.com/citations?user=
|
38 |
-
<a href="https://scholar.google.com/citations?user=
|
39 |
-
<a href="https://scholar.google.com/citations?user=
|
40 |
</p>
|
41 |
</div>
|
42 |
|
@@ -60,21 +60,52 @@
|
|
60 |
</div>
|
61 |
</div>
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
<script>
|
64 |
const highlights = [
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
];
|
72 |
-
|
73 |
let highlightIndex = 0;
|
74 |
-
const highlightText = document.getElementById('highlight-text');
|
75 |
|
76 |
function showHighlight(index) {
|
77 |
-
|
|
|
78 |
}
|
79 |
|
80 |
function prevHighlight() {
|
|
|
34 |
<img src="figures/ARvsFM.png" alt="AR vs FM" style="width: 100%; border-radius: 20px; box-shadow: 0 4px 16px rgba(0,0,0,0.2); margin-bottom: 20px;">
|
35 |
<h1>AR vs FM: A Comparative Study on Audio Modeling Paradigms</h1>
|
36 |
<p>
|
37 |
+
<a href="https://scholar.google.com/citations?user=QK3_J9IAAAAJ" target="_blank">Or Tal</a> ·
|
38 |
+
<a href="https://scholar.google.com/citations?user=UiERcYsAAAAJ" target="_blank">Felix Kreuk</a> ·
|
39 |
+
<a href="https://scholar.google.com/citations?user=ryMtc7sAAAAJ" target="_blank">Yossi Adi</a>
|
40 |
</p>
|
41 |
</div>
|
42 |
|
|
|
60 |
</div>
|
61 |
</div>
|
62 |
|
63 |
+
<!-- Interactive Highlight Slider -->
|
64 |
+
<div class="container">
|
65 |
+
<h2>Paper Highlights</h2>
|
66 |
+
<div id="highlight-box" style="text-align: center; padding: 30px; border: 1px solid #ddd; border-radius: 10px; background: #fafafa;">
|
67 |
+
<p id="highlight-text" style="font-size: 1.2rem; font-style: italic;"></p>
|
68 |
+
<img id="highlight-image" src="" alt="Highlight figure" style="max-width: 100%; max-height: 400px; margin-top: 20px; border-radius: 12px; box-shadow: 0 2px 12px rgba(0,0,0,0.1);">
|
69 |
+
</div>
|
70 |
+
<div class="text-center mt-3">
|
71 |
+
<button onclick="prevHighlight()" class="btn btn-outline-primary">← Prev</button>
|
72 |
+
<button onclick="nextHighlight()" class="btn btn-outline-primary">Next →</button>
|
73 |
+
</div>
|
74 |
+
</div>
|
75 |
+
|
76 |
<script>
|
77 |
const highlights = [
|
78 |
+
{
|
79 |
+
text: "🎼 AR vs FM across 5 axes — fidelity, control, editing, speed, and training. No single winner. Every strength is a trade-off.",
|
80 |
+
image: "figures/highlights/table.png"
|
81 |
+
},
|
82 |
+
{
|
83 |
+
text: "Both modeling paradigms (EnCodec-based latent) show comparable performance with a slight favor toward AR, which also prove to be more robust to the latent representation’s sample rate. FM performance degrade as the number of inference steps decrease. In order to maintain comparable performance with AR, FM requires a large number of inference steps.",
|
84 |
+
image: "figures/highlights/fidelity.png"
|
85 |
+
},
|
86 |
+
{
|
87 |
+
text: "AR follows temporally-aligned conditioning more accurately than FM, but both paradigms lose perceptual quality under strict controls, illustrating a controllability–fidelity trade-off.",
|
88 |
+
image: "figures/highlights/control.png"
|
89 |
+
},
|
90 |
+
{
|
91 |
+
text: "Supervised flow matching is the most robust inpainting method: it yields the smoothest and most coherent edits; zero-shot flow matching is attractive for rapid, prompt-driven edits but needs a small hyper-parameter search per-sample or a better sampling strategy to provide more stable outputs.",
|
92 |
+
image: "figures/highlights/inpainting.png"
|
93 |
+
},
|
94 |
+
{
|
95 |
+
text: "AR scales better with batch size thanks to KV caching; FM may becomes faster while reducing the number of inference steps, however this comes at the cost of degraded generation quality. Selecting a modeling paradigm therefore hinges on how much quality one is willing to trade for latency.",
|
96 |
+
image: "figures/highlights/speed_vs_quality.png"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
text: "When the number of update steps is capped, FM reaches almost the same FAD, PQ, and CE as in the one-million-step topline using much smaller batches, though its CLAP score keeps improving with scale. The AR model needs a larger token budget per step to match its topline performance and benefits more from large scale training.",
|
100 |
+
image: "figures/highlights/training_sensitivity.png"
|
101 |
+
}
|
102 |
];
|
103 |
+
|
104 |
let highlightIndex = 0;
|
|
|
105 |
|
106 |
function showHighlight(index) {
|
107 |
+
document.getElementById('highlight-text').textContent = highlights[index].text;
|
108 |
+
document.getElementById('highlight-image').src = highlights[index].image;
|
109 |
}
|
110 |
|
111 |
function prevHighlight() {
|