|
{ |
|
"action_dim": 32, |
|
"action_head_cfg": { |
|
"action_dim": 32, |
|
"action_horizon": 16, |
|
"add_pos_embed": true, |
|
"backbone_embedding_dim": 2048, |
|
"diffusion_model_cfg": { |
|
"attention_head_dim": 48, |
|
"cross_attention_dim": 2048, |
|
"dropout": 0.2, |
|
"final_dropout": true, |
|
"interleave_self_attention": true, |
|
"norm_type": "ada_norm", |
|
"num_attention_heads": 32, |
|
"num_layers": 16, |
|
"output_dim": 1024, |
|
"positional_embeddings": null |
|
}, |
|
"hidden_size": 1024, |
|
"input_embedding_dim": 1536, |
|
"max_action_dim": 32, |
|
"max_state_dim": 64, |
|
"model_dtype": "float32", |
|
"noise_beta_alpha": 1.5, |
|
"noise_beta_beta": 1.0, |
|
"noise_s": 0.999, |
|
"num_inference_timesteps": 4, |
|
"num_target_vision_tokens": 32, |
|
"num_timestep_buckets": 1000, |
|
"tune_diffusion_model": true, |
|
"tune_projector": true, |
|
"use_vlln": true, |
|
"vl_self_attention_cfg": { |
|
"attention_head_dim": 64, |
|
"dropout": 0.2, |
|
"final_dropout": true, |
|
"num_attention_heads": 32, |
|
"num_layers": 4, |
|
"positional_embeddings": null |
|
} |
|
}, |
|
"action_horizon": 16, |
|
"architectures": [ |
|
"GR00T_N1_5" |
|
], |
|
"backbone_cfg": { |
|
"eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops", |
|
"load_bf16": false, |
|
"project_to_dim": null, |
|
"reproject_vision": false, |
|
"select_layer": 12, |
|
"tune_llm": false, |
|
"tune_visual": true, |
|
"use_flash_attention": true |
|
}, |
|
"hidden_size": 2048, |
|
"model_dtype": "float32", |
|
"model_type": "gr00t_n1_5", |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.51.3" |
|
} |