# Hyperparameters and config settings

EMBED_DIM = 256        # Size of token embeddings
NUM_HEADS = 8          # Number of attention heads
NUM_LAYERS = 4         # Number of transformer blocks
FF_DIM = 512           # Feedforward layer dimension
MAX_SEQ_LEN = 256      # Maximum sequence length
VOCAB_SIZE = 100       # Placeholder (will be overridden based on dataset)
ADAPTER_DIM = 32        # Add in adapter for continual learning