Upload Nova FP8 quantized model v1.1
Browse files- README.md +1 -1
- config.json +2 -2
README.md
CHANGED
@@ -45,7 +45,7 @@ This model is a Nova-quantized FP8 version of [meta-llama/Llama-3.2-1B-Instruct]
|
|
45 |
| Metric | Value |
|
46 |
|--------|-------|
|
47 |
| Model Size Reduction | 50.0% |
|
48 |
-
| Quantization Time | 0.
|
49 |
| Memory Usage | 3.44 GB |
|
50 |
|
51 |
## Validation Results
|
|
|
45 |
| Metric | Value |
|
46 |
|--------|-------|
|
47 |
| Model Size Reduction | 50.0% |
|
48 |
+
| Quantization Time | 0.12972354888916016 seconds |
|
49 |
| Memory Usage | 3.44 GB |
|
50 |
|
51 |
## Validation Results
|
config.json
CHANGED
@@ -91,13 +91,13 @@
|
|
91 |
"transformers_version": "4.53.0",
|
92 |
"model_type": "llama",
|
93 |
"nova_quant": true,
|
94 |
-
"nova_quant_version": "1.
|
95 |
"quantization_config": {
|
96 |
"quant_method": "fp8",
|
97 |
"fmt": "e4m3",
|
98 |
"flashinfer_optimized": true,
|
99 |
"quantized_by": "nova",
|
100 |
-
"quantized_at": "2025-09-06T01:
|
101 |
"source_model": "meta-llama/Llama-3.2-1B-Instruct",
|
102 |
"modules_to_not_convert": [
|
103 |
"lm_head",
|
|
|
91 |
"transformers_version": "4.53.0",
|
92 |
"model_type": "llama",
|
93 |
"nova_quant": true,
|
94 |
+
"nova_quant_version": "1.1",
|
95 |
"quantization_config": {
|
96 |
"quant_method": "fp8",
|
97 |
"fmt": "e4m3",
|
98 |
"flashinfer_optimized": true,
|
99 |
"quantized_by": "nova",
|
100 |
+
"quantized_at": "2025-09-06T01:50:12.562042",
|
101 |
"source_model": "meta-llama/Llama-3.2-1B-Instruct",
|
102 |
"modules_to_not_convert": [
|
103 |
"lm_head",
|