Upload DeepSeekV2Lite DenseMixer model
Browse filesDeepSeekV2Lite model with DenseMixer architecture
- modeling_deepseek.py +1 -1
modeling_deepseek.py
CHANGED
@@ -666,7 +666,7 @@ class DeepseekV2MoE(nn.Module):
|
|
666 |
matches = (topk_idx == expert_idx)
|
667 |
if matches.any():
|
668 |
token_indices, k_indices = torch.where(matches)
|
669 |
-
weights_topk = topk_weight[token_indices, k_indices].unsqueeze(-1) # (num_matches, 1)
|
670 |
sparse_outputs[token_indices] = sparse_outputs[token_indices] + expert_output[token_indices] * weights_topk
|
671 |
else:
|
672 |
# 推理模式:使用原始的稀疏计算逻辑
|
|
|
666 |
matches = (topk_idx == expert_idx)
|
667 |
if matches.any():
|
668 |
token_indices, k_indices = torch.where(matches)
|
669 |
+
weights_topk = topk_weight[token_indices, k_indices].unsqueeze(-1).to(sparse_outputs.dtype) # (num_matches, 1)
|
670 |
sparse_outputs[token_indices] = sparse_outputs[token_indices] + expert_output[token_indices] * weights_topk
|
671 |
else:
|
672 |
# 推理模式:使用原始的稀疏计算逻辑
|