amd
/

DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid

ONNX

Model card Files Files and versions Community

satreysa commited on 7 days ago

Commit

72a7845

verified ·

1 Parent(s): c0c02ce

Update genai_config.json

Browse files

Files changed (1) hide show

genai_config.json +54 -54

genai_config.json CHANGED Viewed

@@ -1,55 +1,55 @@
-{
-    "model": {
-        "bos_token_id": 128000,
-        "context_length": 131072,
-        "decoder": {
-            "session_options": {
-                "log_id": "onnxruntime-genai",
-                "custom_ops_library": "<path_to>\\onnx_custom_ops.dll",
-                "external_data_file": "DeepSeek-R1-Distill-Llama-8B.pb.bin",
-                "custom_allocator": "shared_d3d_xrt",
-                "hybrid_opt_free_after_prefill": "0",
-                "hybrid_opt_gpu_jit": "0",
-                "provider_options": []
-            },
-            "filename": "DeepSeek-R1-Distill-Llama-8B.onnx",
-            "head_size": 128,
-            "hidden_size": 4096,
-            "inputs": {
-                "input_ids": "input_ids",
-                "attention_mask": "attention_mask",
-                "position_ids": "position_ids",
-                "past_key_names": "past_key_values.%d.key",
-                "past_value_names": "past_key_values.%d.value"
-            },
-            "outputs": {
-                "logits": "logits",
-                "present_key_names": "present.%d.key",
-                "present_value_names": "present.%d.value"
-            },
-            "num_attention_heads": 32,
-            "num_hidden_layers": 32,
-            "num_key_value_heads": 8
-        },
-        "eos_token_id": 128001,
-        "pad_token_id": 128001,
-        "type": "llama",
-        "vocab_size": 128256
-    },
-    "search": {
-        "diversity_penalty": 0.0,
-        "do_sample": true,
-        "early_stopping": true,
-        "length_penalty": 1.0,
-        "max_length": 131072,
-        "min_length": 0,
-        "no_repeat_ngram_size": 0,
-        "num_beams": 1,
-        "num_return_sequences": 1,
-        "past_present_share_buffer": true,
-        "repetition_penalty": 1.0,
-        "temperature": 0.6,
-        "top_k": 1,
-        "top_p": 0.95
-    }
 }

+{
+    "model": {
+        "bos_token_id": 128000,
+        "context_length": 131072,
+        "decoder": {
+            "session_options": {
+                "log_id": "onnxruntime-genai",
+                "custom_ops_library": "onnx_custom_ops.dll",
+                "external_data_file": "DeepSeek-R1-Distill-Llama-8B.pb.bin",
+                "custom_allocator": "shared_d3d_xrt",
+                "hybrid_opt_free_after_prefill": "0",
+                "hybrid_opt_gpu_jit": "0",
+                "provider_options": []
+            },
+            "filename": "DeepSeek-R1-Distill-Llama-8B.onnx",
+            "head_size": 128,
+            "hidden_size": 4096,
+            "inputs": {
+                "input_ids": "input_ids",
+                "attention_mask": "attention_mask",
+                "position_ids": "position_ids",
+                "past_key_names": "past_key_values.%d.key",
+                "past_value_names": "past_key_values.%d.value"
+            },
+            "outputs": {
+                "logits": "logits",
+                "present_key_names": "present.%d.key",
+                "present_value_names": "present.%d.value"
+            },
+            "num_attention_heads": 32,
+            "num_hidden_layers": 32,
+            "num_key_value_heads": 8
+        },
+        "eos_token_id": 128001,
+        "pad_token_id": 128001,
+        "type": "llama",
+        "vocab_size": 128256
+    },
+    "search": {
+        "diversity_penalty": 0.0,
+        "do_sample": true,
+        "early_stopping": true,
+        "length_penalty": 1.0,
+        "max_length": 131072,
+        "min_length": 0,
+        "no_repeat_ngram_size": 0,
+        "num_beams": 1,
+        "num_return_sequences": 1,
+        "past_present_share_buffer": true,
+        "repetition_penalty": 1.0,
+        "temperature": 0.6,
+        "top_k": 1,
+        "top_p": 0.95
+    }
 }