Qwen3-MaCoTo / qwen3_moe_config.py
beyoru's picture
Create qwen3_moe_config.py
ad207aa verified
raw
history blame
576 Bytes
from transformers import PretrainedConfig
class Qwen3MoEConfig(PretrainedConfig):
model_type = "qwen3moe"
def __init__(
self,
router_model_path=None,
expert_model_paths=None,
labels=None,
torch_dtype="auto",
tokenizer_path=None,
**kwargs,
):
super().__init__(**kwargs)
self.router_model_path = router_model_path
self.expert_model_paths = expert_model_paths or {}
self.labels = labels or []
self.torch_dtype = torch_dtype
self.tokenizer_path = tokenizer_path