cagataydev commited on
Commit
562e80a
·
verified ·
1 Parent(s): 1349d0d

Upload GR00T trained model

Browse files
README.md ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: mit
5
+ library_name: transformers
6
+ pipeline_tag: robotics
7
+ tags:
8
+ - gr00t
9
+ - robotics
10
+ - nvidia
11
+ - embodied-ai
12
+ - trained-model
13
+ private: true
14
+ ---
15
+
16
+ # gr00t-wholettheducksout-1to1-matched
17
+
18
+ This is a GR00T (Generalist Robot 00 Technology) model trained using NVIDIA's GR00T training framework.
19
+
20
+ ## Model Details
21
+
22
+ - **Model Type**: GR00T Embodied AI Model
23
+ - **Training Job**: wholettheducksout_1to1_matched
24
+ - **Training Steps**: 200,000
25
+ - **Training Duration**: ~23.5 hours
26
+ - **Data Configuration**: so100_dualcam
27
+ - **Base Model**: nvidia/GR00T-N1.5-3B
28
+
29
+ ## Training Configuration
30
+
31
+ ```json
32
+ {
33
+ "action_dim": 32,
34
+ "action_head_cfg": {
35
+ "action_dim": 32,
36
+ "action_horizon": 16,
37
+ "add_pos_embed": true,
38
+ "backbone_embedding_dim": 2048,
39
+ "diffusion_model_cfg": {
40
+ "attention_head_dim": 48,
41
+ "cross_attention_dim": 2048,
42
+ "dropout": 0.2,
43
+ "final_dropout": true,
44
+ "interleave_self_attention": true,
45
+ "norm_type": "ada_norm",
46
+ "num_attention_heads": 32,
47
+ "num_layers": 16,
48
+ "output_dim": 1024,
49
+ "positional_embeddings": null
50
+ },
51
+ "hidden_size": 1024,
52
+ "input_embedding_dim": 1536,
53
+ "max_action_dim": 32,
54
+ "max_state_dim": 64,
55
+ "model_dtype": "float32",
56
+ "noise_beta_alpha": 1.5,
57
+ "noise_beta_beta": 1.0,
58
+ "noise_s": 0.999,
59
+ "num_inference_timesteps": 4,
60
+ "num_target_vision_tokens": 32,
61
+ "num_timestep_buckets": 1000,
62
+ "tune_diffusion_model": true,
63
+ "tune_projector": true,
64
+ "use_vlln": true,
65
+ "vl_self_attention_cfg": {
66
+ "attention_head_dim": 64,
67
+ "dropout": 0.2,
68
+ "final_dropout": true,
69
+ "num_attention_heads": 32,
70
+ "num_layers": 4,
71
+ "positional_embeddings": null
72
+ }
73
+ },
74
+ "action_horizon": 16,
75
+ "architectures": [
76
+ "GR00T_N1_5"
77
+ ],
78
+ "attn_implementation": null,
79
+ "backbone_cfg": {
80
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
81
+ "load_bf16": false,
82
+ "project_to_dim": null,
83
+ "reproject_vision": false,
84
+ "select_layer": 12,
85
+ "tune_llm": false,
86
+ "tune_visual": true,
87
+ "use_flash_attention": true
88
+ },
89
+ "compute_dtype": "bfloat16",
90
+ "hidden_size": 2048,
91
+ "model_dtype": "float32",
92
+ "model_type": "gr00t_n1_5",
93
+ "torch_dtype": "bfloat16",
94
+ "transformers_version": "4.51.3"
95
+ }
96
+ ```
97
+
98
+ ## Usage
99
+
100
+ This model can be used with the GR00T inference framework:
101
+
102
+ ```python
103
+ # Example usage (adjust based on your specific setup)
104
+ from gr00t_inference import GR00TInference
105
+
106
+ model = GR00TInference(
107
+ model_path="path/to/this/model",
108
+ embodiment_tag="new_embodiment",
109
+ data_config="so100"
110
+ )
111
+
112
+ # Use for inference
113
+ results = model.infer(your_input_data)
114
+ ```
115
+
116
+ ## Training Metadata
117
+
118
+ {
119
+ "new_embodiment": {
120
+ "statistics": {
121
+ "state": {
122
+ "single_arm": {
123
+ "max": [
124
+ 72.46653747558594,
125
+ 62.818336486816406,
126
+ 99.72752380371094,
127
+ 99.39103698730469,
128
+ -46.26399230957031
129
+ ],
130
+ "min": [
131
+ -86.99808502197266,
132
+ -99.32088470458984,
133
+ -97.72933959960938,
134
+ -87.64680480957031,
135
+ -65.0611801147461
136
+ ],
137
+ "mean": [
138
+ -7.457055568695068,
139
+ -25.479028701782227,
140
+ 32.967071533203125,
141
+ 35.0267333984375,
142
+ -55.26940155029297
143
+ ],
144
+ "std": [
145
+ 20.533525466918945,
146
+ 50.98550033569336,
147
+ 50.28582763671875,
148
+ 45.0773811340332,
149
+ 2.7385220527648926
150
+ ],
151
+ "q01": [
152
+ -75.78075408935547,
153
+ -99.1511001586914,
154
+ -95.18619537353516,
155
+ -62.41844177246094,
156
+ -61.2080192565918
157
+ ],
158
+ "q99": [
159
+ 33.20586395263672,
160
+ 55.67232688903806,
161
+ 99.54586791992188,
162
+ 99.30404663085938,
163
+ -48.86748123168945
164
+ ]
165
+ },
166
+ "gripper": {
167
+ "max": [
168
+ 49.49358367919922
169
+ ],
170
+ "min": [
171
+ 1.3504388332366943
172
+ ],
173
+ "mean": [
174
+ 11.123491287231445
175
+ ],
176
+ "std": [
177
+ 10.017578125
178
+ ],
179
+ "q01": [
180
+ 1.3504388332366943
181
+ ],
182
+ "q99": [
183
+ 40.64821243286133
184
+ ]
185
+ }
186
+ },
187
+ "action": {
188
+ "single_arm": {
189
+ "max": [
190
+ 73.06226348876953,
191
+ 62.077701568603516,
192
+ 99.81908416748047,
193
+ 100.0,
194
+ -46.0078010559082
195
+ ],
196
+ "min": [
197
+ -87.29351806640625,
198
+ -100.0,
199
+ -99.81908416748047,
200
+ -91.41742706298828,
201
+ -65.25357818603516
202
+ ],
203
+ "mean": [
204
+ -7.188200950622559,
205
+ -26.144899368286133,
206
+ 31.129091262817383,
207
+ 34.6439094543457,
208
+ -55.28120803833008
209
+ ],
210
+ "std": [
211
+ 20.539134979248047,
212
+ 50.40521240234375,
213
+ 50.696495056152344,
214
+ 45.221248626708984,
215
+ 2.745452642440796
216
+ ],
217
+ "q01": [
218
+ -75.47649383544922,
219
+ -99.49324035644531,
220
+ -96.72727142333984,
221
+ -62.808841705322266,
222
+ -61.508453369140625
223
+ ],
224
+ "q99": [
225
+ 33.67217254638672,
226
+ 54.47635269165039,
227
+ 99.63817596435547,
228
+ 99.56653594970703,
229
+ -48.920677185058594
230
+ ]
231
+ },
232
+ "gripper": {
233
+ "max": [
234
+ 49.88161087036133
235
+ ],
236
+ "min": [
237
+ 0.23677979409694672
238
+ ],
239
+ "mean": [
240
+ 9.19546890258789
241
+ ],
242
+ "std": [
243
+ 10.420595169067383
244
+ ],
245
+ "q01": [
246
+ 1.262825608253479
247
+ ],
248
+ "q99": [
249
+ 40.64719772338867
250
+ ]
251
+ }
252
+ }
253
+ },
254
+ "modalities": {
255
+ "video": {
256
+ "front": {
257
+ "resolution": [
258
+ 640,
259
+ 480
260
+ ],
261
+ "channels": 3,
262
+ "fps": 30.0
263
+ },
264
+ "wrist": {
265
+ "resolution": [
266
+ 640,
267
+ 480
268
+ ],
269
+ "channels": 3,
270
+ "fps": 30.0
271
+ }
272
+ },
273
+ "state": {
274
+ "single_arm": {
275
+ "absolute": true,
276
+ "rotation_type": null,
277
+ "shape": [
278
+ 5
279
+ ],
280
+ "continuous": true
281
+ },
282
+ "gripper": {
283
+ "absolute": true,
284
+ "rotation_type": null,
285
+ "shape": [
286
+ 1
287
+ ],
288
+ "continuous": true
289
+ }
290
+ },
291
+ "action": {
292
+ "single_arm": {
293
+ "absolute": true,
294
+ "rotation_type": null,
295
+ "shape": [
296
+ 5
297
+ ],
298
+ "continuous": true
299
+ },
300
+ "gripper": {
301
+ "absolute": true,
302
+ "rotation_type": null,
303
+ "shape": [
304
+ 1
305
+ ],
306
+ "continuous": true
307
+ }
308
+ }
309
+ },
310
+ "embodiment_tag": "new_embodiment"
311
+ }
312
+ }
313
+
314
+ ## Files
315
+
316
+ - `config.json`: Model configuration
317
+ - `model-*.safetensors`: Model weights in SafeTensors format
318
+ - `model.safetensors.index.json`: Model sharding index
319
+ - `experiment_cfg/metadata.json`: Training experiment metadata
320
+
321
+ ## License
322
+
323
+ This model is released under the MIT license.
checkpoint-100000/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "action_dim": 32,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "cross_attention_dim": 2048,
11
+ "dropout": 0.2,
12
+ "final_dropout": true,
13
+ "interleave_self_attention": true,
14
+ "norm_type": "ada_norm",
15
+ "num_attention_heads": 32,
16
+ "num_layers": 16,
17
+ "output_dim": 1024,
18
+ "positional_embeddings": null
19
+ },
20
+ "hidden_size": 1024,
21
+ "input_embedding_dim": 1536,
22
+ "max_action_dim": 32,
23
+ "max_state_dim": 64,
24
+ "model_dtype": "float32",
25
+ "noise_beta_alpha": 1.5,
26
+ "noise_beta_beta": 1.0,
27
+ "noise_s": 0.999,
28
+ "num_inference_timesteps": 4,
29
+ "num_target_vision_tokens": 32,
30
+ "num_timestep_buckets": 1000,
31
+ "tune_diffusion_model": true,
32
+ "tune_projector": true,
33
+ "use_vlln": true,
34
+ "vl_self_attention_cfg": {
35
+ "attention_head_dim": 64,
36
+ "dropout": 0.2,
37
+ "final_dropout": true,
38
+ "num_attention_heads": 32,
39
+ "num_layers": 4,
40
+ "positional_embeddings": null
41
+ }
42
+ },
43
+ "action_horizon": 16,
44
+ "architectures": [
45
+ "GR00T_N1_5"
46
+ ],
47
+ "attn_implementation": null,
48
+ "backbone_cfg": {
49
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
50
+ "load_bf16": false,
51
+ "project_to_dim": null,
52
+ "reproject_vision": false,
53
+ "select_layer": 12,
54
+ "tune_llm": false,
55
+ "tune_visual": true,
56
+ "use_flash_attention": true
57
+ },
58
+ "compute_dtype": "bfloat16",
59
+ "hidden_size": 2048,
60
+ "model_dtype": "float32",
61
+ "model_type": "gr00t_n1_5",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.51.3"
64
+ }
checkpoint-100000/experiment_cfg/metadata.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "statistics": {
4
+ "state": {
5
+ "single_arm": {
6
+ "max": [
7
+ 72.46653747558594,
8
+ 62.818336486816406,
9
+ 99.72752380371094,
10
+ 99.39103698730469,
11
+ -46.26399230957031
12
+ ],
13
+ "min": [
14
+ -86.99808502197266,
15
+ -99.32088470458984,
16
+ -97.72933959960938,
17
+ -87.64680480957031,
18
+ -65.0611801147461
19
+ ],
20
+ "mean": [
21
+ -7.457055568695068,
22
+ -25.479028701782227,
23
+ 32.967071533203125,
24
+ 35.0267333984375,
25
+ -55.26940155029297
26
+ ],
27
+ "std": [
28
+ 20.533525466918945,
29
+ 50.98550033569336,
30
+ 50.28582763671875,
31
+ 45.0773811340332,
32
+ 2.7385220527648926
33
+ ],
34
+ "q01": [
35
+ -75.78075408935547,
36
+ -99.1511001586914,
37
+ -95.18619537353516,
38
+ -62.41844177246094,
39
+ -61.2080192565918
40
+ ],
41
+ "q99": [
42
+ 33.20586395263672,
43
+ 55.67232688903806,
44
+ 99.54586791992188,
45
+ 99.30404663085938,
46
+ -48.86748123168945
47
+ ]
48
+ },
49
+ "gripper": {
50
+ "max": [
51
+ 49.49358367919922
52
+ ],
53
+ "min": [
54
+ 1.3504388332366943
55
+ ],
56
+ "mean": [
57
+ 11.123491287231445
58
+ ],
59
+ "std": [
60
+ 10.017578125
61
+ ],
62
+ "q01": [
63
+ 1.3504388332366943
64
+ ],
65
+ "q99": [
66
+ 40.64821243286133
67
+ ]
68
+ }
69
+ },
70
+ "action": {
71
+ "single_arm": {
72
+ "max": [
73
+ 73.06226348876953,
74
+ 62.077701568603516,
75
+ 99.81908416748047,
76
+ 100.0,
77
+ -46.0078010559082
78
+ ],
79
+ "min": [
80
+ -87.29351806640625,
81
+ -100.0,
82
+ -99.81908416748047,
83
+ -91.41742706298828,
84
+ -65.25357818603516
85
+ ],
86
+ "mean": [
87
+ -7.188200950622559,
88
+ -26.144899368286133,
89
+ 31.129091262817383,
90
+ 34.6439094543457,
91
+ -55.28120803833008
92
+ ],
93
+ "std": [
94
+ 20.539134979248047,
95
+ 50.40521240234375,
96
+ 50.696495056152344,
97
+ 45.221248626708984,
98
+ 2.745452642440796
99
+ ],
100
+ "q01": [
101
+ -75.47649383544922,
102
+ -99.49324035644531,
103
+ -96.72727142333984,
104
+ -62.808841705322266,
105
+ -61.508453369140625
106
+ ],
107
+ "q99": [
108
+ 33.67217254638672,
109
+ 54.47635269165039,
110
+ 99.63817596435547,
111
+ 99.56653594970703,
112
+ -48.920677185058594
113
+ ]
114
+ },
115
+ "gripper": {
116
+ "max": [
117
+ 49.88161087036133
118
+ ],
119
+ "min": [
120
+ 0.23677979409694672
121
+ ],
122
+ "mean": [
123
+ 9.19546890258789
124
+ ],
125
+ "std": [
126
+ 10.420595169067383
127
+ ],
128
+ "q01": [
129
+ 1.262825608253479
130
+ ],
131
+ "q99": [
132
+ 40.64719772338867
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "modalities": {
138
+ "video": {
139
+ "front": {
140
+ "resolution": [
141
+ 640,
142
+ 480
143
+ ],
144
+ "channels": 3,
145
+ "fps": 30.0
146
+ },
147
+ "wrist": {
148
+ "resolution": [
149
+ 640,
150
+ 480
151
+ ],
152
+ "channels": 3,
153
+ "fps": 30.0
154
+ }
155
+ },
156
+ "state": {
157
+ "single_arm": {
158
+ "absolute": true,
159
+ "rotation_type": null,
160
+ "shape": [
161
+ 5
162
+ ],
163
+ "continuous": true
164
+ },
165
+ "gripper": {
166
+ "absolute": true,
167
+ "rotation_type": null,
168
+ "shape": [
169
+ 1
170
+ ],
171
+ "continuous": true
172
+ }
173
+ },
174
+ "action": {
175
+ "single_arm": {
176
+ "absolute": true,
177
+ "rotation_type": null,
178
+ "shape": [
179
+ 5
180
+ ],
181
+ "continuous": true
182
+ },
183
+ "gripper": {
184
+ "absolute": true,
185
+ "rotation_type": null,
186
+ "shape": [
187
+ 1
188
+ ],
189
+ "continuous": true
190
+ }
191
+ }
192
+ },
193
+ "embodiment_tag": "new_embodiment"
194
+ }
195
+ }
checkpoint-100000/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a6f9f8ff9da38fff5a4c138e98ffff82e303065d1daff25aab9b734c4e05148
3
+ size 4999367032
checkpoint-100000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82cdf0828c4e8f5bfc27457cd27734c6b11cbcae692c8993fcbbb1b71af57717
3
+ size 2586705312
checkpoint-100000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-200000/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "action_dim": 32,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "cross_attention_dim": 2048,
11
+ "dropout": 0.2,
12
+ "final_dropout": true,
13
+ "interleave_self_attention": true,
14
+ "norm_type": "ada_norm",
15
+ "num_attention_heads": 32,
16
+ "num_layers": 16,
17
+ "output_dim": 1024,
18
+ "positional_embeddings": null
19
+ },
20
+ "hidden_size": 1024,
21
+ "input_embedding_dim": 1536,
22
+ "max_action_dim": 32,
23
+ "max_state_dim": 64,
24
+ "model_dtype": "float32",
25
+ "noise_beta_alpha": 1.5,
26
+ "noise_beta_beta": 1.0,
27
+ "noise_s": 0.999,
28
+ "num_inference_timesteps": 4,
29
+ "num_target_vision_tokens": 32,
30
+ "num_timestep_buckets": 1000,
31
+ "tune_diffusion_model": true,
32
+ "tune_projector": true,
33
+ "use_vlln": true,
34
+ "vl_self_attention_cfg": {
35
+ "attention_head_dim": 64,
36
+ "dropout": 0.2,
37
+ "final_dropout": true,
38
+ "num_attention_heads": 32,
39
+ "num_layers": 4,
40
+ "positional_embeddings": null
41
+ }
42
+ },
43
+ "action_horizon": 16,
44
+ "architectures": [
45
+ "GR00T_N1_5"
46
+ ],
47
+ "attn_implementation": null,
48
+ "backbone_cfg": {
49
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
50
+ "load_bf16": false,
51
+ "project_to_dim": null,
52
+ "reproject_vision": false,
53
+ "select_layer": 12,
54
+ "tune_llm": false,
55
+ "tune_visual": true,
56
+ "use_flash_attention": true
57
+ },
58
+ "compute_dtype": "bfloat16",
59
+ "hidden_size": 2048,
60
+ "model_dtype": "float32",
61
+ "model_type": "gr00t_n1_5",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.51.3"
64
+ }
checkpoint-200000/experiment_cfg/metadata.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "statistics": {
4
+ "state": {
5
+ "single_arm": {
6
+ "max": [
7
+ 72.46653747558594,
8
+ 62.818336486816406,
9
+ 99.72752380371094,
10
+ 99.39103698730469,
11
+ -46.26399230957031
12
+ ],
13
+ "min": [
14
+ -86.99808502197266,
15
+ -99.32088470458984,
16
+ -97.72933959960938,
17
+ -87.64680480957031,
18
+ -65.0611801147461
19
+ ],
20
+ "mean": [
21
+ -7.457055568695068,
22
+ -25.479028701782227,
23
+ 32.967071533203125,
24
+ 35.0267333984375,
25
+ -55.26940155029297
26
+ ],
27
+ "std": [
28
+ 20.533525466918945,
29
+ 50.98550033569336,
30
+ 50.28582763671875,
31
+ 45.0773811340332,
32
+ 2.7385220527648926
33
+ ],
34
+ "q01": [
35
+ -75.78075408935547,
36
+ -99.1511001586914,
37
+ -95.18619537353516,
38
+ -62.41844177246094,
39
+ -61.2080192565918
40
+ ],
41
+ "q99": [
42
+ 33.20586395263672,
43
+ 55.67232688903806,
44
+ 99.54586791992188,
45
+ 99.30404663085938,
46
+ -48.86748123168945
47
+ ]
48
+ },
49
+ "gripper": {
50
+ "max": [
51
+ 49.49358367919922
52
+ ],
53
+ "min": [
54
+ 1.3504388332366943
55
+ ],
56
+ "mean": [
57
+ 11.123491287231445
58
+ ],
59
+ "std": [
60
+ 10.017578125
61
+ ],
62
+ "q01": [
63
+ 1.3504388332366943
64
+ ],
65
+ "q99": [
66
+ 40.64821243286133
67
+ ]
68
+ }
69
+ },
70
+ "action": {
71
+ "single_arm": {
72
+ "max": [
73
+ 73.06226348876953,
74
+ 62.077701568603516,
75
+ 99.81908416748047,
76
+ 100.0,
77
+ -46.0078010559082
78
+ ],
79
+ "min": [
80
+ -87.29351806640625,
81
+ -100.0,
82
+ -99.81908416748047,
83
+ -91.41742706298828,
84
+ -65.25357818603516
85
+ ],
86
+ "mean": [
87
+ -7.188200950622559,
88
+ -26.144899368286133,
89
+ 31.129091262817383,
90
+ 34.6439094543457,
91
+ -55.28120803833008
92
+ ],
93
+ "std": [
94
+ 20.539134979248047,
95
+ 50.40521240234375,
96
+ 50.696495056152344,
97
+ 45.221248626708984,
98
+ 2.745452642440796
99
+ ],
100
+ "q01": [
101
+ -75.47649383544922,
102
+ -99.49324035644531,
103
+ -96.72727142333984,
104
+ -62.808841705322266,
105
+ -61.508453369140625
106
+ ],
107
+ "q99": [
108
+ 33.67217254638672,
109
+ 54.47635269165039,
110
+ 99.63817596435547,
111
+ 99.56653594970703,
112
+ -48.920677185058594
113
+ ]
114
+ },
115
+ "gripper": {
116
+ "max": [
117
+ 49.88161087036133
118
+ ],
119
+ "min": [
120
+ 0.23677979409694672
121
+ ],
122
+ "mean": [
123
+ 9.19546890258789
124
+ ],
125
+ "std": [
126
+ 10.420595169067383
127
+ ],
128
+ "q01": [
129
+ 1.262825608253479
130
+ ],
131
+ "q99": [
132
+ 40.64719772338867
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "modalities": {
138
+ "video": {
139
+ "front": {
140
+ "resolution": [
141
+ 640,
142
+ 480
143
+ ],
144
+ "channels": 3,
145
+ "fps": 30.0
146
+ },
147
+ "wrist": {
148
+ "resolution": [
149
+ 640,
150
+ 480
151
+ ],
152
+ "channels": 3,
153
+ "fps": 30.0
154
+ }
155
+ },
156
+ "state": {
157
+ "single_arm": {
158
+ "absolute": true,
159
+ "rotation_type": null,
160
+ "shape": [
161
+ 5
162
+ ],
163
+ "continuous": true
164
+ },
165
+ "gripper": {
166
+ "absolute": true,
167
+ "rotation_type": null,
168
+ "shape": [
169
+ 1
170
+ ],
171
+ "continuous": true
172
+ }
173
+ },
174
+ "action": {
175
+ "single_arm": {
176
+ "absolute": true,
177
+ "rotation_type": null,
178
+ "shape": [
179
+ 5
180
+ ],
181
+ "continuous": true
182
+ },
183
+ "gripper": {
184
+ "absolute": true,
185
+ "rotation_type": null,
186
+ "shape": [
187
+ 1
188
+ ],
189
+ "continuous": true
190
+ }
191
+ }
192
+ },
193
+ "embodiment_tag": "new_embodiment"
194
+ }
195
+ }
checkpoint-200000/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f93439a42eae276312ea1f0452966edf951160315b50100ce0585fb90217e4
3
+ size 4999367032
checkpoint-200000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2243d652e92631432c113c3ccdc90d77c5204efd4f82dcde260650ce42c3510e
3
+ size 2586705312
checkpoint-200000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "action_dim": 32,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "cross_attention_dim": 2048,
11
+ "dropout": 0.2,
12
+ "final_dropout": true,
13
+ "interleave_self_attention": true,
14
+ "norm_type": "ada_norm",
15
+ "num_attention_heads": 32,
16
+ "num_layers": 16,
17
+ "output_dim": 1024,
18
+ "positional_embeddings": null
19
+ },
20
+ "hidden_size": 1024,
21
+ "input_embedding_dim": 1536,
22
+ "max_action_dim": 32,
23
+ "max_state_dim": 64,
24
+ "model_dtype": "float32",
25
+ "noise_beta_alpha": 1.5,
26
+ "noise_beta_beta": 1.0,
27
+ "noise_s": 0.999,
28
+ "num_inference_timesteps": 4,
29
+ "num_target_vision_tokens": 32,
30
+ "num_timestep_buckets": 1000,
31
+ "tune_diffusion_model": true,
32
+ "tune_projector": true,
33
+ "use_vlln": true,
34
+ "vl_self_attention_cfg": {
35
+ "attention_head_dim": 64,
36
+ "dropout": 0.2,
37
+ "final_dropout": true,
38
+ "num_attention_heads": 32,
39
+ "num_layers": 4,
40
+ "positional_embeddings": null
41
+ }
42
+ },
43
+ "action_horizon": 16,
44
+ "architectures": [
45
+ "GR00T_N1_5"
46
+ ],
47
+ "attn_implementation": null,
48
+ "backbone_cfg": {
49
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
50
+ "load_bf16": false,
51
+ "project_to_dim": null,
52
+ "reproject_vision": false,
53
+ "select_layer": 12,
54
+ "tune_llm": false,
55
+ "tune_visual": true,
56
+ "use_flash_attention": true
57
+ },
58
+ "compute_dtype": "bfloat16",
59
+ "hidden_size": 2048,
60
+ "model_dtype": "float32",
61
+ "model_type": "gr00t_n1_5",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.51.3"
64
+ }
experiment_cfg/metadata.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "statistics": {
4
+ "state": {
5
+ "single_arm": {
6
+ "max": [
7
+ 72.46653747558594,
8
+ 62.818336486816406,
9
+ 99.72752380371094,
10
+ 99.39103698730469,
11
+ -46.26399230957031
12
+ ],
13
+ "min": [
14
+ -86.99808502197266,
15
+ -99.32088470458984,
16
+ -97.72933959960938,
17
+ -87.64680480957031,
18
+ -65.0611801147461
19
+ ],
20
+ "mean": [
21
+ -7.457055568695068,
22
+ -25.479028701782227,
23
+ 32.967071533203125,
24
+ 35.0267333984375,
25
+ -55.26940155029297
26
+ ],
27
+ "std": [
28
+ 20.533525466918945,
29
+ 50.98550033569336,
30
+ 50.28582763671875,
31
+ 45.0773811340332,
32
+ 2.7385220527648926
33
+ ],
34
+ "q01": [
35
+ -75.78075408935547,
36
+ -99.1511001586914,
37
+ -95.18619537353516,
38
+ -62.41844177246094,
39
+ -61.2080192565918
40
+ ],
41
+ "q99": [
42
+ 33.20586395263672,
43
+ 55.67232688903806,
44
+ 99.54586791992188,
45
+ 99.30404663085938,
46
+ -48.86748123168945
47
+ ]
48
+ },
49
+ "gripper": {
50
+ "max": [
51
+ 49.49358367919922
52
+ ],
53
+ "min": [
54
+ 1.3504388332366943
55
+ ],
56
+ "mean": [
57
+ 11.123491287231445
58
+ ],
59
+ "std": [
60
+ 10.017578125
61
+ ],
62
+ "q01": [
63
+ 1.3504388332366943
64
+ ],
65
+ "q99": [
66
+ 40.64821243286133
67
+ ]
68
+ }
69
+ },
70
+ "action": {
71
+ "single_arm": {
72
+ "max": [
73
+ 73.06226348876953,
74
+ 62.077701568603516,
75
+ 99.81908416748047,
76
+ 100.0,
77
+ -46.0078010559082
78
+ ],
79
+ "min": [
80
+ -87.29351806640625,
81
+ -100.0,
82
+ -99.81908416748047,
83
+ -91.41742706298828,
84
+ -65.25357818603516
85
+ ],
86
+ "mean": [
87
+ -7.188200950622559,
88
+ -26.144899368286133,
89
+ 31.129091262817383,
90
+ 34.6439094543457,
91
+ -55.28120803833008
92
+ ],
93
+ "std": [
94
+ 20.539134979248047,
95
+ 50.40521240234375,
96
+ 50.696495056152344,
97
+ 45.221248626708984,
98
+ 2.745452642440796
99
+ ],
100
+ "q01": [
101
+ -75.47649383544922,
102
+ -99.49324035644531,
103
+ -96.72727142333984,
104
+ -62.808841705322266,
105
+ -61.508453369140625
106
+ ],
107
+ "q99": [
108
+ 33.67217254638672,
109
+ 54.47635269165039,
110
+ 99.63817596435547,
111
+ 99.56653594970703,
112
+ -48.920677185058594
113
+ ]
114
+ },
115
+ "gripper": {
116
+ "max": [
117
+ 49.88161087036133
118
+ ],
119
+ "min": [
120
+ 0.23677979409694672
121
+ ],
122
+ "mean": [
123
+ 9.19546890258789
124
+ ],
125
+ "std": [
126
+ 10.420595169067383
127
+ ],
128
+ "q01": [
129
+ 1.262825608253479
130
+ ],
131
+ "q99": [
132
+ 40.64719772338867
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "modalities": {
138
+ "video": {
139
+ "front": {
140
+ "resolution": [
141
+ 640,
142
+ 480
143
+ ],
144
+ "channels": 3,
145
+ "fps": 30.0
146
+ },
147
+ "wrist": {
148
+ "resolution": [
149
+ 640,
150
+ 480
151
+ ],
152
+ "channels": 3,
153
+ "fps": 30.0
154
+ }
155
+ },
156
+ "state": {
157
+ "single_arm": {
158
+ "absolute": true,
159
+ "rotation_type": null,
160
+ "shape": [
161
+ 5
162
+ ],
163
+ "continuous": true
164
+ },
165
+ "gripper": {
166
+ "absolute": true,
167
+ "rotation_type": null,
168
+ "shape": [
169
+ 1
170
+ ],
171
+ "continuous": true
172
+ }
173
+ },
174
+ "action": {
175
+ "single_arm": {
176
+ "absolute": true,
177
+ "rotation_type": null,
178
+ "shape": [
179
+ 5
180
+ ],
181
+ "continuous": true
182
+ },
183
+ "gripper": {
184
+ "absolute": true,
185
+ "rotation_type": null,
186
+ "shape": [
187
+ 1
188
+ ],
189
+ "continuous": true
190
+ }
191
+ }
192
+ },
193
+ "embodiment_tag": "new_embodiment"
194
+ }
195
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f93439a42eae276312ea1f0452966edf951160315b50100ce0585fb90217e4
3
+ size 4999367032
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2243d652e92631432c113c3ccdc90d77c5204efd4f82dcde260650ce42c3510e
3
+ size 2586705312
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff