psynote123 commited on
Commit
3d42388
·
verified ·
1 Parent(s): b6ee866

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +11 -11
README.md CHANGED
@@ -148,11 +148,11 @@ Benchmarking is one of the most important procedures during model acceleration.
148
 
149
  | Metric/Model | S | M | L | XL | Original | W8A8, int8 |
150
  |---------------|---|---|---|----|----------|------------|
151
- | arc_challenge | 65.30 | 66.30 | 66.70 | 66.80 | 66.80 | 65.30 | - |
152
- | gsm8k | 87.70 | 88.40 | 87.70 | - | - | 87.70 | - |
153
- | mmlu | 79.00 | 79.40 | 79.70 | 80.20 | 80.20 | 79.00 | - |
154
- | piqa | 82.90 | 83.10 | 82.60 | 83.00 | 83.00 | 82.90 | - |
155
- | winogrande | 78.20 | 79.40 | 79.30 | 79.50 | 79.50 | 78.20 | - |
156
 
157
 
158
 
@@ -169,7 +169,7 @@ __100 input/300 output; tok/s:__
169
  | GPU/Model | S | M | L | XL | Original | W8A8, int8 |
170
  |-----------|-----|---|---|----|----------|------------|
171
  | H100 | 90 | 82 | 72 | 54 | 41 | 95 | - |
172
- | L40S | 25 | 24 | 20 | -1 | -1 | 27 | - |
173
 
174
 
175
  ### Performance by Context Size
@@ -211,22 +211,22 @@ The tables below show performance (tokens per second) for different input contex
211
  |---------|-------------|---|---|---|----|---------|
212
  | Small | 256 | 26.0 | 24.0 | 21.0 | - | - | - |
213
  | Medium | 1024 | 25.8 | 23.8 | 20.9 | - | - | - |
214
- | Large | 4096 | 25.2 | 23.3 | 20.5 | - | - | - |
215
 
216
  *Batch Size 8:*
217
 
218
  | Context | Input Tokens | S | M | L | XL | Original |
219
  |---------|-------------|---|---|---|----|---------|
220
- | Small | 256 | 25.3 | 23.4 | 20.5 | - | - | - |
221
- | Medium | 1024 | 24.3 | 22.4 | 19.7 | - | - | - |
222
  | Large | 4096 | - | - | - | - | - | - |
223
 
224
  *Batch Size 16:*
225
 
226
  | Context | Input Tokens | S | M | L | XL | Original |
227
  |---------|-------------|---|---|---|----|---------|
228
- | Small | 256 | 24.9 | 22.9 | 20.2 | - | - | - |
229
- | Medium | 1024 | 22.8 | 21.1 | - | - | - | - |
230
  | Large | 4096 | - | - | - | - | - | - |
231
 
232
 
 
148
 
149
  | Metric/Model | S | M | L | XL | Original | W8A8, int8 |
150
  |---------------|---|---|---|----|----------|------------|
151
+ | arc_challenge | 65.30 | 66.30 | 66.70 | 66.80 | 66.80 | 64.20 | - |
152
+ | gsm8k | 87.70 | 88.40 | 87.70 | - | - | 87.60 | - |
153
+ | mmlu | 79.00 | 79.40 | 79.70 | 80.20 | 80.20 | 78.90 | - |
154
+ | piqa | 82.90 | 83.10 | 82.60 | 83.00 | 83.00 | 81.80 | - |
155
+ | winogrande | 78.20 | 79.40 | 79.30 | 79.50 | 79.50 | 76.00 | - |
156
 
157
 
158
 
 
169
  | GPU/Model | S | M | L | XL | Original | W8A8, int8 |
170
  |-----------|-----|---|---|----|----------|------------|
171
  | H100 | 90 | 82 | 72 | 54 | 41 | 95 | - |
172
+ | L40S | 25 | 23 | 20 | -1 | -1 | 27 | - |
173
 
174
 
175
  ### Performance by Context Size
 
211
  |---------|-------------|---|---|---|----|---------|
212
  | Small | 256 | 26.0 | 24.0 | 21.0 | - | - | - |
213
  | Medium | 1024 | 25.8 | 23.8 | 20.9 | - | - | - |
214
+ | Large | 4096 | 25.1 | 23.3 | 20.5 | - | - | - |
215
 
216
  *Batch Size 8:*
217
 
218
  | Context | Input Tokens | S | M | L | XL | Original |
219
  |---------|-------------|---|---|---|----|---------|
220
+ | Small | 256 | 25.2 | 23.2 | 20.4 | - | - | - |
221
+ | Medium | 1024 | 24.3 | 22.4 | 19.8 | - | - | - |
222
  | Large | 4096 | - | - | - | - | - | - |
223
 
224
  *Batch Size 16:*
225
 
226
  | Context | Input Tokens | S | M | L | XL | Original |
227
  |---------|-------------|---|---|---|----|---------|
228
+ | Small | 256 | 24.5 | 22.6 | 19.9 | - | - | - |
229
+ | Medium | 1024 | 22.8 | 20.9 | - | - | - | - |
230
  | Large | 4096 | - | - | - | - | - | - |
231
 
232