Update moe_idefics2.py
Browse filesRemoved some commented code
- moe_idefics2.py +0 -8
moe_idefics2.py
CHANGED
@@ -52,14 +52,6 @@ class MoE(nn.Module):
|
|
52 |
expert_outputs = []
|
53 |
for expert in self.experts:
|
54 |
expert_outputs.append(expert.down_proj(expert.act_fn(expert.gate_proj(x)) * expert.up_proj(x)))
|
55 |
-
'''
|
56 |
-
|
57 |
-
up_states = expert.gate_up_proj(x.view(-1, x.size(-1))) # Flatten to [batch_size * seq_length, input_dim]
|
58 |
-
gate, up_states = up_states.chunk(2, dim=-1)
|
59 |
-
up_states = up_states * expert.activation_fn(gate)
|
60 |
-
expert_output = expert.down_proj(up_states)
|
61 |
-
expert_outputs.append(expert_output.view(batch_size, seq_length, -1))
|
62 |
-
'''
|
63 |
|
64 |
expert_outputs = torch.stack(expert_outputs, dim=-1) # Shape: [batch_size, seq_length, hidden_size, num_experts]
|
65 |
|
|
|
52 |
expert_outputs = []
|
53 |
for expert in self.experts:
|
54 |
expert_outputs.append(expert.down_proj(expert.act_fn(expert.gate_proj(x)) * expert.up_proj(x)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
expert_outputs = torch.stack(expert_outputs, dim=-1) # Shape: [batch_size, seq_length, hidden_size, num_experts]
|
57 |
|