File size: 424 Bytes
5f57783
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
gate_mode: random
dtype: float16
tokenizer_source: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
experts_per_token: 2
experts:
  - source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
  - source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
shared_experts:
  - source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
    positive_prompts:
      - ""
    residual_scale: 0.1