autoprogrammer commited on
Commit
49ad088
·
verified ·
1 Parent(s): f10caa3

Upload DeepSeekV2Lite DenseMixer model

Browse files

DeepSeekV2Lite model with DenseMixer architecture

Files changed (1) hide show
  1. modeling_deepseek.py +1 -1
modeling_deepseek.py CHANGED
@@ -666,7 +666,7 @@ class DeepseekV2MoE(nn.Module):
666
  matches = (topk_idx == expert_idx)
667
  if matches.any():
668
  token_indices, k_indices = torch.where(matches)
669
- weights_topk = topk_weight[token_indices, k_indices].unsqueeze(-1) # (num_matches, 1)
670
  sparse_outputs[token_indices] = sparse_outputs[token_indices] + expert_output[token_indices] * weights_topk
671
  else:
672
  # 推理模式:使用原始的稀疏计算逻辑
 
666
  matches = (topk_idx == expert_idx)
667
  if matches.any():
668
  token_indices, k_indices = torch.where(matches)
669
+ weights_topk = topk_weight[token_indices, k_indices].unsqueeze(-1).to(sparse_outputs.dtype) # (num_matches, 1)
670
  sparse_outputs[token_indices] = sparse_outputs[token_indices] + expert_output[token_indices] * weights_topk
671
  else:
672
  # 推理模式:使用原始的稀疏计算逻辑