dsmoe-1bx7b / mergekit_moe_config.yml
ehristoforu's picture
Upload folder using huggingface_hub
5f57783 verified
raw
history blame contribute delete
424 Bytes
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
gate_mode: random
dtype: float16
tokenizer_source: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
experts_per_token: 2
experts:
- source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
- source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
shared_experts:
- source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
positive_prompts:
- ""
residual_scale: 0.1