Upload README.md with huggingface_hub
Browse files
README.md
CHANGED
@@ -148,11 +148,11 @@ Benchmarking is one of the most important procedures during model acceleration.
|
|
148 |
|
149 |
| Metric/Model | S | M | L | XL | Original | W8A8, int8 |
|
150 |
|---------------|---|---|---|----|----------|------------|
|
151 |
-
| arc_challenge | 65.30 | 66.30 | 66.70 | 66.80 | 66.80 |
|
152 |
-
| gsm8k | 87.70 | 88.40 | 87.70 | - | - | 87.
|
153 |
-
| mmlu | 79.00 | 79.40 | 79.70 | 80.20 | 80.20 |
|
154 |
-
| piqa | 82.90 | 83.10 | 82.60 | 83.00 | 83.00 |
|
155 |
-
| winogrande | 78.20 | 79.40 | 79.30 | 79.50 | 79.50 |
|
156 |
|
157 |
|
158 |
|
@@ -169,7 +169,7 @@ __100 input/300 output; tok/s:__
|
|
169 |
| GPU/Model | S | M | L | XL | Original | W8A8, int8 |
|
170 |
|-----------|-----|---|---|----|----------|------------|
|
171 |
| H100 | 90 | 82 | 72 | 54 | 41 | 95 | - |
|
172 |
-
| L40S | 25 |
|
173 |
|
174 |
|
175 |
### Performance by Context Size
|
@@ -211,22 +211,22 @@ The tables below show performance (tokens per second) for different input contex
|
|
211 |
|---------|-------------|---|---|---|----|---------|
|
212 |
| Small | 256 | 26.0 | 24.0 | 21.0 | - | - | - |
|
213 |
| Medium | 1024 | 25.8 | 23.8 | 20.9 | - | - | - |
|
214 |
-
| Large | 4096 | 25.
|
215 |
|
216 |
*Batch Size 8:*
|
217 |
|
218 |
| Context | Input Tokens | S | M | L | XL | Original |
|
219 |
|---------|-------------|---|---|---|----|---------|
|
220 |
-
| Small | 256 | 25.
|
221 |
-
| Medium | 1024 | 24.3 | 22.4 | 19.
|
222 |
| Large | 4096 | - | - | - | - | - | - |
|
223 |
|
224 |
*Batch Size 16:*
|
225 |
|
226 |
| Context | Input Tokens | S | M | L | XL | Original |
|
227 |
|---------|-------------|---|---|---|----|---------|
|
228 |
-
| Small | 256 | 24.
|
229 |
-
| Medium | 1024 | 22.8 |
|
230 |
| Large | 4096 | - | - | - | - | - | - |
|
231 |
|
232 |
|
|
|
148 |
|
149 |
| Metric/Model | S | M | L | XL | Original | W8A8, int8 |
|
150 |
|---------------|---|---|---|----|----------|------------|
|
151 |
+
| arc_challenge | 65.30 | 66.30 | 66.70 | 66.80 | 66.80 | 64.20 | - |
|
152 |
+
| gsm8k | 87.70 | 88.40 | 87.70 | - | - | 87.60 | - |
|
153 |
+
| mmlu | 79.00 | 79.40 | 79.70 | 80.20 | 80.20 | 78.90 | - |
|
154 |
+
| piqa | 82.90 | 83.10 | 82.60 | 83.00 | 83.00 | 81.80 | - |
|
155 |
+
| winogrande | 78.20 | 79.40 | 79.30 | 79.50 | 79.50 | 76.00 | - |
|
156 |
|
157 |
|
158 |
|
|
|
169 |
| GPU/Model | S | M | L | XL | Original | W8A8, int8 |
|
170 |
|-----------|-----|---|---|----|----------|------------|
|
171 |
| H100 | 90 | 82 | 72 | 54 | 41 | 95 | - |
|
172 |
+
| L40S | 25 | 23 | 20 | -1 | -1 | 27 | - |
|
173 |
|
174 |
|
175 |
### Performance by Context Size
|
|
|
211 |
|---------|-------------|---|---|---|----|---------|
|
212 |
| Small | 256 | 26.0 | 24.0 | 21.0 | - | - | - |
|
213 |
| Medium | 1024 | 25.8 | 23.8 | 20.9 | - | - | - |
|
214 |
+
| Large | 4096 | 25.1 | 23.3 | 20.5 | - | - | - |
|
215 |
|
216 |
*Batch Size 8:*
|
217 |
|
218 |
| Context | Input Tokens | S | M | L | XL | Original |
|
219 |
|---------|-------------|---|---|---|----|---------|
|
220 |
+
| Small | 256 | 25.2 | 23.2 | 20.4 | - | - | - |
|
221 |
+
| Medium | 1024 | 24.3 | 22.4 | 19.8 | - | - | - |
|
222 |
| Large | 4096 | - | - | - | - | - | - |
|
223 |
|
224 |
*Batch Size 16:*
|
225 |
|
226 |
| Context | Input Tokens | S | M | L | XL | Original |
|
227 |
|---------|-------------|---|---|---|----|---------|
|
228 |
+
| Small | 256 | 24.5 | 22.6 | 19.9 | - | - | - |
|
229 |
+
| Medium | 1024 | 22.8 | 20.9 | - | - | - | - |
|
230 |
| Large | 4096 | - | - | - | - | - | - |
|
231 |
|
232 |
|