Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,280 @@
|
|
1 |
-
---
|
2 |
-
license: mit
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
- fr
|
6 |
+
- de
|
7 |
+
- es
|
8 |
+
- pt
|
9 |
+
- it
|
10 |
+
- ja
|
11 |
+
- ko
|
12 |
+
- ru
|
13 |
+
- zh
|
14 |
+
- ar
|
15 |
+
- fa
|
16 |
+
- id
|
17 |
+
- ms
|
18 |
+
- ns
|
19 |
+
- pl
|
20 |
+
- ro
|
21 |
+
- sr
|
22 |
+
- sv
|
23 |
+
- tr
|
24 |
+
- uk
|
25 |
+
- vi
|
26 |
+
- hi
|
27 |
+
- bn
|
28 |
+
library_name: transformers
|
29 |
+
inference: false
|
30 |
+
---
|
31 |
+
|
32 |
+
<style>
|
33 |
+
:root{
|
34 |
+
--bg: #0b0c0f;
|
35 |
+
--panel: #0f1117;
|
36 |
+
--ink: #e9eefc;
|
37 |
+
--muted: #9aa3b2;
|
38 |
+
--brand: #433aac; /* purple */
|
39 |
+
--brand-2: #6b4fe8; /* lighter purple accent */
|
40 |
+
--border: rgba(255,255,255,.08);
|
41 |
+
--glow: rgba(67,58,172,.25);
|
42 |
+
--radius: 16px;
|
43 |
+
}
|
44 |
+
*{ box-sizing: border-box }
|
45 |
+
body{ margin: 0; padding: 28px; background: var(--bg); color: var(--muted); font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; }
|
46 |
+
.card{
|
47 |
+
background: linear-gradient(180deg,rgba(255,255,255,.02),rgba(255,255,255,.00));
|
48 |
+
border:1px solid var(--border);
|
49 |
+
border-radius: var(--radius);
|
50 |
+
padding:16px;
|
51 |
+
}
|
52 |
+
.badge{
|
53 |
+
display:inline-flex;align-items:center;gap:.5rem;
|
54 |
+
padding:.35rem .6rem;border:1px solid var(--border);border-radius:999px;
|
55 |
+
color:var(--muted);font-size:.85rem
|
56 |
+
}
|
57 |
+
.grid{ display:grid; gap:18px }
|
58 |
+
.grid-2{ grid-template-columns:repeat(2,minmax(0,1fr)); }
|
59 |
+
.grid-3{ grid-template-columns:repeat(3,minmax(0,1fr)); }
|
60 |
+
@media(max-width:900px){ .grid-2,.grid-3{ grid-template-columns:1fr } }
|
61 |
+
.kicker{
|
62 |
+
display:inline-block;letter-spacing:.12em;text-transform:uppercase;
|
63 |
+
color:var(--muted);font-size:.75rem;margin-bottom:.5rem
|
64 |
+
}
|
65 |
+
h1,h2,h3{ color:var(--ink); margin:0 0 .4rem 0; line-height:1.1 }
|
66 |
+
h1{ font-size:2.25rem; font-weight:800 }
|
67 |
+
h2{ font-size:1.3rem; font-weight:700 }
|
68 |
+
h3{ font-size:1.05rem; font-weight:700 }
|
69 |
+
p,li{ color:var(--muted); line-height:1.6 }
|
70 |
+
hr{ border:none; height:1px; background:var(--border); margin:28px 0 }
|
71 |
+
a.btn{
|
72 |
+
display:inline-block; padding:.7rem 1rem; border-radius:12px;
|
73 |
+
background: linear-gradient(180deg,var(--brand),#3a2d98);
|
74 |
+
color:var(--ink); text-decoration:none; font-weight:600;
|
75 |
+
box-shadow: 0 10px 30px var(--glow);
|
76 |
+
}
|
77 |
+
a.btn.ghost{
|
78 |
+
background:transparent; color:var(--ink); border:1px solid var(--border)
|
79 |
+
}
|
80 |
+
kbd{
|
81 |
+
background:#0c1322;color:#cfe0ff;border:1px solid #1a2742;border-bottom-color:#142138;
|
82 |
+
padding:.12rem .4rem;border-radius:6px;font-size:.85rem
|
83 |
+
}
|
84 |
+
.codeblock{
|
85 |
+
background:#0b1220;border:1px solid #15233d;border-radius:12px;padding: 8px;overflow:auto;
|
86 |
+
margin: 1rem 0;
|
87 |
+
}
|
88 |
+
.codeblock pre {
|
89 |
+
margin: 0;
|
90 |
+
color: var(--ink);
|
91 |
+
}
|
92 |
+
.tagline{
|
93 |
+
font-size:1.05rem;color:#c6d5ff
|
94 |
+
}
|
95 |
+
.pill{
|
96 |
+
display:inline-flex;align-items:center;gap:.4rem;
|
97 |
+
padding:.35rem .6rem;border-radius:999px;border:1px dashed var(--border);color:#b9c5db
|
98 |
+
}
|
99 |
+
.hero{
|
100 |
+
background:
|
101 |
+
radial-gradient(600px 240px at 20% 0%,rgba(67,58,172,.18),transparent 60%),
|
102 |
+
radial-gradient(600px 240px at 80% 10%,rgba(107,79,232,.12),transparent 60%);
|
103 |
+
border:1px solid var(--border);
|
104 |
+
border-radius:20px; padding:28px
|
105 |
+
}
|
106 |
+
details{
|
107 |
+
border:1px solid var(--border);border-radius:12px;padding:14px;background:rgba(255,255,255,.02)
|
108 |
+
}
|
109 |
+
summary{ cursor:pointer;color:var(--ink);font-weight:700 }
|
110 |
+
blockquote{
|
111 |
+
margin:0;padding:14px;border-left:3px solid var(--brand);background:rgba(67,58,172,.06);
|
112 |
+
border-radius:0 10px 10px 0;color:#8f7ce8
|
113 |
+
}
|
114 |
+
table{ width:100%; border-collapse:collapse; margin: 1rem 0; }
|
115 |
+
th,td{ text-align:left; padding:10px; border-bottom:1px solid var(--border); color:var(--muted); font-size: .9rem; }
|
116 |
+
th{ color:var(--brand-2); font-weight: 700; }
|
117 |
+
.callout{
|
118 |
+
border:1px solid var(--border);border-radius:14px;padding:14px;background:rgba(255,255,255,.02)
|
119 |
+
}
|
120 |
+
.metadata{
|
121 |
+
background: #0a0b0e; border: 1px solid var(--border); border-radius: 12px;
|
122 |
+
padding: 16px; margin-bottom: 24px; font-family: 'Monaco', 'Menlo', monospace;
|
123 |
+
font-size: .85rem; color: #8a91a3;
|
124 |
+
}
|
125 |
+
</style>
|
126 |
+
|
127 |
+
<div class="hero">
|
128 |
+
<div class="kicker">Open Models</div>
|
129 |
+
<h1>Aqui-open1 Model Family</h1>
|
130 |
+
<p class="tagline">We hereby present the first series of small open models by Aqui, trained from scratch and with an MIT license. The open1 family delivers state-of-the-art performance across reasoning, mathematics, and coding tasks while maintaining efficient inference capabilities.</p>
|
131 |
+
|
132 |
+
<div style="margin-top: 20px; display: flex; gap: 12px; flex-wrap: wrap;">
|
133 |
+
<div class="pill">π Released September 7, 2025</div>
|
134 |
+
<div class="pill">π MIT Licensed</div>
|
135 |
+
<div class="pill">β‘ Efficient Architecture</div>
|
136 |
+
</div>
|
137 |
+
</div>
|
138 |
+
|
139 |
+
<div class="grid grid-2" style="margin-top: 28px;">
|
140 |
+
<div class="card">
|
141 |
+
<h2>open1-1.5B-Instruct</h2>
|
142 |
+
<p>Ultra-efficient model optimized for edge deployment and real-time applications.</p>
|
143 |
+
<div style="margin: 16px 0;">
|
144 |
+
<div class="badge">οΏ½οΏ½οΏ½ 1.5B parameters</div>
|
145 |
+
<div class="badge">π 128K context</div>
|
146 |
+
<div class="badge">β‘ Fast inference</div>
|
147 |
+
</div>
|
148 |
+
<a href="https://huggingface.co/aquigpt/open1-1.5B-Instruct" class="btn">View Model</a>
|
149 |
+
</div>
|
150 |
+
|
151 |
+
<div class="card">
|
152 |
+
<h2>open1-7.5B-Instruct</h2>
|
153 |
+
<p>Balanced model providing exceptional performance across diverse tasks with reasonable compute requirements.</p>
|
154 |
+
<div style="margin: 16px 0;">
|
155 |
+
<div class="badge">π§ 7.5B parameters</div>
|
156 |
+
<div class="badge">π 32K context</div>
|
157 |
+
<div class="badge">π― High accuracy</div>
|
158 |
+
</div>
|
159 |
+
<a href="https://huggingface.co/aquigpt/open1-7.5B-Instruct" class="btn">View Model</a>
|
160 |
+
</div>
|
161 |
+
</div>
|
162 |
+
|
163 |
+
<div class="callout" style="margin: 28px 0;">
|
164 |
+
<h3>π Coming This Week</h3>
|
165 |
+
<p><strong>Aqui-open1-30B-A7B</strong> β Our flagship model featuring advanced reasoning capabilities and multimodal understanding. Stay tuned for the most capable open model in the series.</p>
|
166 |
+
</div>
|
167 |
+
|
168 |
+
<hr>
|
169 |
+
|
170 |
+
<h2>Benchmark Performance</h2>
|
171 |
+
|
172 |
+
<h3>1.5B Model Comparison</h3>
|
173 |
+
<table>
|
174 |
+
<thead>
|
175 |
+
<tr>
|
176 |
+
<th>Metric</th>
|
177 |
+
<th>open1-1.5B-Instruct</th>
|
178 |
+
<th>Llama-3.2-1B-Instruct</th>
|
179 |
+
<th>LFM2-1.2B</th>
|
180 |
+
<th>Qwen3-1.7B</th>
|
181 |
+
<th>Gemma-3-1B-it</th>
|
182 |
+
<th>SmolLM2-1.7B-Instruct</th>
|
183 |
+
</tr>
|
184 |
+
</thead>
|
185 |
+
<tbody>
|
186 |
+
<tr><td>MMLU</td><td><strong>58.5</strong></td><td>46.6</td><td>55.2</td><td>59.1</td><td>40.1</td><td>42.3</td></tr>
|
187 |
+
<tr><td>GPQA</td><td><strong>32.3</strong></td><td>28.8</td><td>31.5</td><td>27.7</td><td>21.1</td><td>22.1</td></tr>
|
188 |
+
<tr><td>GSM8K</td><td><strong>62.6</strong></td><td>35.7</td><td>58.3</td><td>51.4</td><td>59.6</td><td>48.2</td></tr>
|
189 |
+
<tr><td>IFEval</td><td>72.7</td><td>52.4</td><td><strong>74.9</strong></td><td>74.0</td><td>62.9</td><td>56.7</td></tr>
|
190 |
+
<tr><td>MGSM</td><td>59.1</td><td>29.1</td><td>55.0</td><td><strong>66.6</strong></td><td>43.6</td><td>38.5</td></tr>
|
191 |
+
<tr style="border-top: 2px solid var(--brand);"><td><strong>Average</strong></td><td><strong>57.0</strong></td><td>38.5</td><td>55.0</td><td>55.8</td><td>45.5</td><td>41.6</td></tr>
|
192 |
+
</tbody>
|
193 |
+
</table>
|
194 |
+
|
195 |
+
<h3>7.5B Model Comparison</h3>
|
196 |
+
<table>
|
197 |
+
<thead>
|
198 |
+
<tr>
|
199 |
+
<th>Benchmark</th>
|
200 |
+
<th>open1-7.5B-Instruct</th>
|
201 |
+
<th>Llama-3.1-8B-Instruct</th>
|
202 |
+
<th>LFM-7B</th>
|
203 |
+
<th>Qwen3-8B</th>
|
204 |
+
<th>Gemma-3-12B-it</th>
|
205 |
+
<th>Nemotron-Nano-9B-v2</th>
|
206 |
+
</tr>
|
207 |
+
</thead>
|
208 |
+
<tbody>
|
209 |
+
<tr><td>MMLU</td><td><strong>75.8</strong></td><td>68.7</td><td>69.4</td><td>71.6</td><td>72.5</td><td>74.5</td></tr>
|
210 |
+
<tr><td>HumanEval</td><td>82.3</td><td>71.7</td><td>70.1</td><td>84.8</td><td>84.8</td><td><strong>86.2</strong></td></tr>
|
211 |
+
<tr><td>GPQA Diamond</td><td><strong>52.2</strong></td><td>25.9</td><td>32.9</td><td>45.2</td><td>34.9</td><td>40.8</td></tr>
|
212 |
+
<tr><td>IFEval</td><td>78.9</td><td>77.0</td><td>71.6</td><td>83.4</td><td>81.5</td><td><strong>84.3</strong></td></tr>
|
213 |
+
<tr><td>AIME 2025</td><td>18.9</td><td>4.3</td><td>2.1</td><td><strong>20.2</strong></td><td>18.3</td><td>20.1</td></tr>
|
214 |
+
<tr style="border-top: 2px solid var(--brand);"><td><strong>Average</strong></td><td><strong>61.6</strong></td><td>49.5</td><td>49.2</td><td>61.0</td><td>58.4</td><td>61.2</td></tr>
|
215 |
+
</tbody>
|
216 |
+
</table>
|
217 |
+
|
218 |
+
<hr>
|
219 |
+
|
220 |
+
<h2>Key Features</h2>
|
221 |
+
|
222 |
+
<div class="grid grid-2">
|
223 |
+
<div class="card">
|
224 |
+
<h3>π― Superior Reasoning</h3>
|
225 |
+
<p>Exceptional performance on MMLU, GPQA, and mathematical reasoning tasks, outperforming models of similar and larger sizes.</p>
|
226 |
+
</div>
|
227 |
+
|
228 |
+
<div class="card">
|
229 |
+
<h3>β‘ Optimized Architecture</h3>
|
230 |
+
<p>Efficient transformer design enabling fast inference while maintaining high accuracy across diverse benchmarks.</p>
|
231 |
+
</div>
|
232 |
+
|
233 |
+
<div class="card">
|
234 |
+
<h3>π Multilingual Support</h3>
|
235 |
+
<p>Trained on 20+ languages with robust performance across linguistic boundaries and cultural contexts.</p>
|
236 |
+
</div>
|
237 |
+
|
238 |
+
<div class="card">
|
239 |
+
<h3>π MIT Licensed</h3>
|
240 |
+
<p>Complete freedom for commercial use, modification, and redistribution with minimal restrictions.</p>
|
241 |
+
</div>
|
242 |
+
</div>
|
243 |
+
|
244 |
+
<hr>
|
245 |
+
|
246 |
+
<h2>Usage</h2>
|
247 |
+
|
248 |
+
<div class="codeblock">
|
249 |
+
<pre>
|
250 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
251 |
+
|
252 |
+
# Load the model and tokenizer
|
253 |
+
tokenizer = AutoTokenizer.from_pretrained("aquigpt/open1-1.5B-Instruct")
|
254 |
+
model = AutoModelForCausalLM.from_pretrained("aquigpt/open1-1.5B-Instruct")
|
255 |
+
|
256 |
+
# Generate text
|
257 |
+
inputs = tokenizer("Explain quantum computing:", return_tensors="pt")
|
258 |
+
outputs = model.generate(**inputs, max_length=200, temperature=0.7)
|
259 |
+
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
260 |
+
</pre>
|
261 |
+
</div>
|
262 |
+
|
263 |
+
<details>
|
264 |
+
<summary>Training Details</summary>
|
265 |
+
<p>The open1 models were trained from scratch on a diverse, high-quality dataset spanning code, mathematics, reasoning, and multilingual text. Training utilized advanced techniques including:</p>
|
266 |
+
<ul>
|
267 |
+
<li>Supervised fine-tuning on instruction-following data</li>
|
268 |
+
<li>Constitutional AI for alignment and safety</li>
|
269 |
+
<li>Advanced attention mechanisms for extended context</li>
|
270 |
+
<li>Multi-stage training with curriculum learning</li>
|
271 |
+
</ul>
|
272 |
+
</details>
|
273 |
+
|
274 |
+
<blockquote>
|
275 |
+
<strong>Note:</strong> These models are designed for research and commercial applications. While they demonstrate strong performance, users should conduct appropriate testing for their specific use cases.
|
276 |
+
</blockquote>
|
277 |
+
|
278 |
+
<div style="text-align: center; margin-top: 40px; color: var(--muted);">
|
279 |
+
<p>Built with β€οΈ by the Aqui team β’ MIT β’ September 2025</p>
|
280 |
+
</div>
|