|
{ |
|
"architectures": [ |
|
"HrwkvHybridForCausalLM" |
|
], |
|
"description": "Hybrid-RWKV Strategically Interleaved RWKV-Attention", |
|
"base_model": "RekaAI/reka-flash-3.1", |
|
"model_revision": "", |
|
"transformer_layers":[4, 8, 11, 15, 18, 22, 25, 29, 32, 36, 39, 43], |
|
"rwkv_layers": [0, 1, 2, 3, 5, 6, 7, 9, 10, 12, 13, 14, 16, 17, 19, 20, 21, 23, 24, 26, 27, 28, 30, 31, 33, 34, 35, 37, 38, 40, 41, 42], |
|
"rwkv_architecture": "hxa079", |
|
"enable_qk_norm": false, |
|
"nope_in_transformer": true, |
|
"nope_in_rwkv": false, |
|
"lora_rank_decay": 320, |
|
"lora_rank_iclr":128, |
|
"lora_rank_value_residual_mix":96, |
|
"lora_rank_key_residual_mix":96, |
|
"lora_rank_gate":384, |
|
|
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 100257, |
|
"classifier_dropout": 0.0, |
|
"eos_token_id": 100257, |
|
"head_dim": 96, |
|
"hidden_act": "silu", |
|
"hidden_size": 6144, |
|
"id2label": { |
|
"0": "LABEL_0" |
|
}, |
|
"initializer_range": 0.006, |
|
"intermediate_size": 19648, |
|
"label2id": { |
|
"LABEL_0": 0 |
|
}, |
|
"max_position_embeddings": 98304, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 64, |
|
"num_hidden_layers": 44, |
|
"num_key_value_heads": 8, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 8000000, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.50.3", |
|
"use_cache": true, |
|
"vocab_size": 100352 |
|
} |
|
|