File size: 424 Bytes
5f57783 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
gate_mode: random
dtype: float16
tokenizer_source: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
experts_per_token: 2
experts:
- source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
- source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
shared_experts:
- source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
positive_prompts:
- ""
residual_scale: 0.1
|