|
{ |
|
"_name_or_path": "/vol/tmp/xlstm-wikipedia-2/", |
|
"add_out_norm": true, |
|
"architectures": [ |
|
"xLSTMForCausalLM" |
|
], |
|
"autocast_kernel_dtype": "bfloat16", |
|
"bos_token_id": 50256, |
|
"chunk_size": 64, |
|
"chunkwise_kernel": "chunkwise--native_autograd", |
|
"embedding_dim": 1024, |
|
"eos_token_id": 50256, |
|
"eps": 1e-06, |
|
"ffn_proj_factor": 2.667, |
|
"ffn_round_up_to_multiple_of": 64, |
|
"force_bos_token_insert": true, |
|
"gate_soft_cap": 15.0, |
|
"inference_state_dtype": "float32", |
|
"max_inference_chunksize": 16384, |
|
"mode": "inference", |
|
"model_type": "xlstm", |
|
"norm_eps": 1e-06, |
|
"norm_reduction_force_float32": true, |
|
"num_blocks": 24, |
|
"num_heads": 4, |
|
"output_logit_soft_cap": 30.0, |
|
"pad_token_id": 50256, |
|
"qk_dim_factor": 0.5, |
|
"return_last_states": true, |
|
"sequence_kernel": "native_sequence__native", |
|
"step_kernel": "native", |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.49.0.dev0", |
|
"use_bias": false, |
|
"use_cache": true, |
|
"v_dim_factor": 1.0, |
|
"vocab_size": 50257, |
|
"weight_mode": "single" |
|
} |
|
|