|
--- |
|
language: |
|
- en |
|
license: mit |
|
library_name: transformers |
|
pipeline_tag: robotics |
|
tags: |
|
- gr00t |
|
- robotics |
|
- nvidia |
|
- embodied-ai |
|
- trained-model |
|
private: true |
|
--- |
|
|
|
# gr00t-wholettheducksout |
|
|
|
This is a GR00T (Generalist Robot 00 Technology) model trained using NVIDIA's GR00T training framework. |
|
|
|
## Model Details |
|
|
|
- **Model Type**: GR00T Embodied AI Model |
|
- **Training Job**: wholettheducksout_1to1_matched |
|
- **Training Steps**: 200,000 |
|
- **Training Duration**: ~23.5 hours |
|
- **Data Configuration**: so100_dualcam |
|
- **Base Model**: nvidia/GR00T-N1.5-3B |
|
|
|
## Training Configuration |
|
|
|
```json |
|
{ |
|
"action_dim": 32, |
|
"action_head_cfg": { |
|
"action_dim": 32, |
|
"action_horizon": 16, |
|
"add_pos_embed": true, |
|
"backbone_embedding_dim": 2048, |
|
"diffusion_model_cfg": { |
|
"attention_head_dim": 48, |
|
"cross_attention_dim": 2048, |
|
"dropout": 0.2, |
|
"final_dropout": true, |
|
"interleave_self_attention": true, |
|
"norm_type": "ada_norm", |
|
"num_attention_heads": 32, |
|
"num_layers": 16, |
|
"output_dim": 1024, |
|
"positional_embeddings": null |
|
}, |
|
"hidden_size": 1024, |
|
"input_embedding_dim": 1536, |
|
"max_action_dim": 32, |
|
"max_state_dim": 64, |
|
"model_dtype": "float32", |
|
"noise_beta_alpha": 1.5, |
|
"noise_beta_beta": 1.0, |
|
"noise_s": 0.999, |
|
"num_inference_timesteps": 4, |
|
"num_target_vision_tokens": 32, |
|
"num_timestep_buckets": 1000, |
|
"tune_diffusion_model": true, |
|
"tune_projector": true, |
|
"use_vlln": true, |
|
"vl_self_attention_cfg": { |
|
"attention_head_dim": 64, |
|
"dropout": 0.2, |
|
"final_dropout": true, |
|
"num_attention_heads": 32, |
|
"num_layers": 4, |
|
"positional_embeddings": null |
|
} |
|
}, |
|
"action_horizon": 16, |
|
"architectures": [ |
|
"GR00T_N1_5" |
|
], |
|
"attn_implementation": null, |
|
"backbone_cfg": { |
|
"eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops", |
|
"load_bf16": false, |
|
"project_to_dim": null, |
|
"reproject_vision": false, |
|
"select_layer": 12, |
|
"tune_llm": false, |
|
"tune_visual": true, |
|
"use_flash_attention": true |
|
}, |
|
"compute_dtype": "bfloat16", |
|
"hidden_size": 2048, |
|
"model_dtype": "float32", |
|
"model_type": "gr00t_n1_5", |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.51.3" |
|
} |
|
``` |
|
|
|
## Usage |
|
|
|
This model can be used with the GR00T inference framework: |
|
|
|
```python |
|
# Example usage (adjust based on your specific setup) |
|
from gr00t_inference import GR00TInference |
|
|
|
model = GR00TInference( |
|
model_path="path/to/this/model", |
|
embodiment_tag="new_embodiment", |
|
data_config="so100" |
|
) |
|
|
|
# Use for inference |
|
results = model.infer(your_input_data) |
|
``` |
|
|
|
## Training Metadata |
|
|
|
{ |
|
"new_embodiment": { |
|
"statistics": { |
|
"state": { |
|
"single_arm": { |
|
"max": [ |
|
72.46653747558594, |
|
62.818336486816406, |
|
99.72752380371094, |
|
99.39103698730469, |
|
-46.26399230957031 |
|
], |
|
"min": [ |
|
-86.99808502197266, |
|
-99.32088470458984, |
|
-97.72933959960938, |
|
-87.64680480957031, |
|
-65.0611801147461 |
|
], |
|
"mean": [ |
|
-7.457055568695068, |
|
-25.479028701782227, |
|
32.967071533203125, |
|
35.0267333984375, |
|
-55.26940155029297 |
|
], |
|
"std": [ |
|
20.533525466918945, |
|
50.98550033569336, |
|
50.28582763671875, |
|
45.0773811340332, |
|
2.7385220527648926 |
|
], |
|
"q01": [ |
|
-75.78075408935547, |
|
-99.1511001586914, |
|
-95.18619537353516, |
|
-62.41844177246094, |
|
-61.2080192565918 |
|
], |
|
"q99": [ |
|
33.20586395263672, |
|
55.67232688903806, |
|
99.54586791992188, |
|
99.30404663085938, |
|
-48.86748123168945 |
|
] |
|
}, |
|
"gripper": { |
|
"max": [ |
|
49.49358367919922 |
|
], |
|
"min": [ |
|
1.3504388332366943 |
|
], |
|
"mean": [ |
|
11.123491287231445 |
|
], |
|
"std": [ |
|
10.017578125 |
|
], |
|
"q01": [ |
|
1.3504388332366943 |
|
], |
|
"q99": [ |
|
40.64821243286133 |
|
] |
|
} |
|
}, |
|
"action": { |
|
"single_arm": { |
|
"max": [ |
|
73.06226348876953, |
|
62.077701568603516, |
|
99.81908416748047, |
|
100.0, |
|
-46.0078010559082 |
|
], |
|
"min": [ |
|
-87.29351806640625, |
|
-100.0, |
|
-99.81908416748047, |
|
-91.41742706298828, |
|
-65.25357818603516 |
|
], |
|
"mean": [ |
|
-7.188200950622559, |
|
-26.144899368286133, |
|
31.129091262817383, |
|
34.6439094543457, |
|
-55.28120803833008 |
|
], |
|
"std": [ |
|
20.539134979248047, |
|
50.40521240234375, |
|
50.696495056152344, |
|
45.221248626708984, |
|
2.745452642440796 |
|
], |
|
"q01": [ |
|
-75.47649383544922, |
|
-99.49324035644531, |
|
-96.72727142333984, |
|
-62.808841705322266, |
|
-61.508453369140625 |
|
], |
|
"q99": [ |
|
33.67217254638672, |
|
54.47635269165039, |
|
99.63817596435547, |
|
99.56653594970703, |
|
-48.920677185058594 |
|
] |
|
}, |
|
"gripper": { |
|
"max": [ |
|
49.88161087036133 |
|
], |
|
"min": [ |
|
0.23677979409694672 |
|
], |
|
"mean": [ |
|
9.19546890258789 |
|
], |
|
"std": [ |
|
10.420595169067383 |
|
], |
|
"q01": [ |
|
1.262825608253479 |
|
], |
|
"q99": [ |
|
40.64719772338867 |
|
] |
|
} |
|
} |
|
}, |
|
"modalities": { |
|
"video": { |
|
"front": { |
|
"resolution": [ |
|
640, |
|
480 |
|
], |
|
"channels": 3, |
|
"fps": 30.0 |
|
}, |
|
"wrist": { |
|
"resolution": [ |
|
640, |
|
480 |
|
], |
|
"channels": 3, |
|
"fps": 30.0 |
|
} |
|
}, |
|
"state": { |
|
"single_arm": { |
|
"absolute": true, |
|
"rotation_type": null, |
|
"shape": [ |
|
5 |
|
], |
|
"continuous": true |
|
}, |
|
"gripper": { |
|
"absolute": true, |
|
"rotation_type": null, |
|
"shape": [ |
|
1 |
|
], |
|
"continuous": true |
|
} |
|
}, |
|
"action": { |
|
"single_arm": { |
|
"absolute": true, |
|
"rotation_type": null, |
|
"shape": [ |
|
5 |
|
], |
|
"continuous": true |
|
}, |
|
"gripper": { |
|
"absolute": true, |
|
"rotation_type": null, |
|
"shape": [ |
|
1 |
|
], |
|
"continuous": true |
|
} |
|
} |
|
}, |
|
"embodiment_tag": "new_embodiment" |
|
} |
|
} |
|
|
|
## Files |
|
|
|
- `config.json`: Model configuration |
|
- `model-*.safetensors`: Model weights in SafeTensors format |
|
- `model.safetensors.index.json`: Model sharding index |
|
- `experiment_cfg/metadata.json`: Training experiment metadata |
|
|
|
## License |
|
|
|
This model is released under the MIT license. |
|
|