|
{
|
|
"model_name": "Echo-TDM",
|
|
"architecture": "EchoLLM",
|
|
"vocab_size": 32000,
|
|
"max_position_embeddings": 8192,
|
|
"d_model": 768,
|
|
"num_layers": 12,
|
|
"num_heads": 12,
|
|
"dim_feedforward": 3072,
|
|
"dropout": 0.1,
|
|
"activation": "gelu",
|
|
"num_experts": 9,
|
|
"use_memory": true,
|
|
"memory_size": 2048,
|
|
"batch_size": 8,
|
|
"learning_rate": 5e-05,
|
|
"num_epochs": 10,
|
|
"optimizer": "Adafactor",
|
|
"scheduler": "cosine",
|
|
"warmup_steps": 1000,
|
|
"weight_decay": 0.01,
|
|
"curriculum_learning": true,
|
|
"loss_function": "CrossEntropyLoss",
|
|
"eval_metric": "perplexity",
|
|
"output_dir": "EchoTDM_Model\\model",
|
|
"checkpoint_frequency": 1,
|
|
"save_best_model": true,
|
|
"load_from_checkpoint": null,
|
|
"use_gradient_checkpointing": true,
|
|
"use_mixed_precision": true,
|
|
"gradient_accumulation_steps": 4,
|
|
"export_formats": [
|
|
"safetensors"
|
|
],
|
|
"precision": "float16",
|
|
"lora_rank": 4,
|
|
"lora_alpha": 16,
|
|
"lora_dropout": 0.1,
|
|
"temp_memory_threshold": 100,
|
|
"perm_memory_limit": 0.05,
|
|
"use_gamification": true,
|
|
"reward_points": 10,
|
|
"user_profile_enabled": true,
|
|
"max_token_length": 2048
|
|
} |