brian-remodl commited on
Commit
b042219
·
verified ·
1 Parent(s): 53da694

Upload Nova FP8 quantized model v1.1

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. config.json +2 -2
README.md CHANGED
@@ -45,7 +45,7 @@ This model is a Nova-quantized FP8 version of [meta-llama/Llama-3.2-1B-Instruct]
45
  | Metric | Value |
46
  |--------|-------|
47
  | Model Size Reduction | 50.0% |
48
- | Quantization Time | 0.12311434745788574 seconds |
49
  | Memory Usage | 3.44 GB |
50
 
51
  ## Validation Results
 
45
  | Metric | Value |
46
  |--------|-------|
47
  | Model Size Reduction | 50.0% |
48
+ | Quantization Time | 0.12972354888916016 seconds |
49
  | Memory Usage | 3.44 GB |
50
 
51
  ## Validation Results
config.json CHANGED
@@ -91,13 +91,13 @@
91
  "transformers_version": "4.53.0",
92
  "model_type": "llama",
93
  "nova_quant": true,
94
- "nova_quant_version": "1.0",
95
  "quantization_config": {
96
  "quant_method": "fp8",
97
  "fmt": "e4m3",
98
  "flashinfer_optimized": true,
99
  "quantized_by": "nova",
100
- "quantized_at": "2025-09-06T01:18:59.343846",
101
  "source_model": "meta-llama/Llama-3.2-1B-Instruct",
102
  "modules_to_not_convert": [
103
  "lm_head",
 
91
  "transformers_version": "4.53.0",
92
  "model_type": "llama",
93
  "nova_quant": true,
94
+ "nova_quant_version": "1.1",
95
  "quantization_config": {
96
  "quant_method": "fp8",
97
  "fmt": "e4m3",
98
  "flashinfer_optimized": true,
99
  "quantized_by": "nova",
100
+ "quantized_at": "2025-09-06T01:50:12.562042",
101
  "source_model": "meta-llama/Llama-3.2-1B-Instruct",
102
  "modules_to_not_convert": [
103
  "lm_head",