Robotics
Safetensors
gr00t_n1_5
youliangt commited on
Commit
9736835
·
verified ·
1 Parent(s): 5841c7e

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ trainer_state.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "action_dim": 32,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "cross_attention_dim": 2048,
11
+ "dropout": 0.2,
12
+ "final_dropout": true,
13
+ "interleave_self_attention": true,
14
+ "norm_type": "ada_norm",
15
+ "num_attention_heads": 32,
16
+ "num_layers": 16,
17
+ "output_dim": 1024,
18
+ "positional_embeddings": null
19
+ },
20
+ "flare_layer": 12,
21
+ "flare_loss_coeff": 0.1,
22
+ "hidden_size": 1024,
23
+ "input_embedding_dim": 1536,
24
+ "max_action_dim": 32,
25
+ "max_state_dim": 64,
26
+ "model_dtype": "float32",
27
+ "noise_beta_alpha": 1.5,
28
+ "noise_beta_beta": 1.0,
29
+ "noise_s": 0.999,
30
+ "num_inference_timesteps": 4,
31
+ "num_target_vision_tokens": 32,
32
+ "num_timestep_buckets": 1000,
33
+ "tune_diffusion_model": true,
34
+ "tune_projector": true,
35
+ "use_vlln": true,
36
+ "vl_self_attention_cfg": {
37
+ "attention_head_dim": 64,
38
+ "dropout": 0.2,
39
+ "final_dropout": true,
40
+ "num_attention_heads": 32,
41
+ "num_layers": 4,
42
+ "positional_embeddings": null
43
+ }
44
+ },
45
+ "action_horizon": 16,
46
+ "architectures": [
47
+ "GR00T_N1_5"
48
+ ],
49
+ "backbone_cfg": {
50
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
51
+ "load_bf16": false,
52
+ "project_to_dim": null,
53
+ "reproject_vision": false,
54
+ "select_layer": 12,
55
+ "tune_llm": false,
56
+ "tune_visual": true,
57
+ "use_flash_attention": true
58
+ },
59
+ "hidden_size": 2048,
60
+ "model_dtype": "float32",
61
+ "model_type": "gr00t_n1_5",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.51.1"
64
+ }
config_backup.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "_convert_": "object",
5
+ "_target_": "groot.model.dual_brain.module.action_head.diffusion_policy.groot_n1_5_flare.XEmbFlowmatchingWMPolicyHead",
6
+ "config": {
7
+ "_recursive_": false,
8
+ "_target_": "groot.model.dual_brain.module.action_head.diffusion_policy.groot_n1_5_flare.XEmbFlowmatchingWMPolicyHeadConfig",
9
+ "action_dim": 32,
10
+ "action_horizon": 16,
11
+ "add_pos_embed": true,
12
+ "backbone_embedding_dim": 2048,
13
+ "diffusion_model_cfg": {
14
+ "_target_": "groot.model.dual_brain.module.action_head.diffusion_policy.module.cross_attention_dit.DiT",
15
+ "attention_head_dim": 48,
16
+ "cross_attention_dim": 2048,
17
+ "dropout": 0.2,
18
+ "final_dropout": true,
19
+ "interleave_self_attention": true,
20
+ "norm_type": "ada_norm",
21
+ "num_attention_heads": 32,
22
+ "num_layers": 16,
23
+ "output_dim": 1024,
24
+ "positional_embeddings": null
25
+ },
26
+ "flare_layer": 12,
27
+ "flare_loss_coeff": 0.1,
28
+ "hidden_size": 1024,
29
+ "input_embedding_dim": 1536,
30
+ "max_action_dim": 32,
31
+ "max_state_dim": 64,
32
+ "model_dtype": "float32",
33
+ "noise_beta_alpha": 1.5,
34
+ "noise_beta_beta": 1.0,
35
+ "noise_s": 0.999,
36
+ "num_inference_timesteps": 4,
37
+ "num_target_vision_tokens": 32,
38
+ "num_timestep_buckets": 1000,
39
+ "pretrained_vl_tower_cfg": {
40
+ "_target_": "groot.model.dual_brain.module.action_head.diffusion_policy.module.qformer_vision_language_tower.Siglip2VLQFormer.from_pretrained",
41
+ "clamp": false,
42
+ "normalization_mode": null,
43
+ "pretrained_model_name_or_path": "/mnt/amlfs-02/shared/ruijiez/checkpoints/siglip2_vl_qformer_grootn1_5",
44
+ "vision_tower_stats_path": null
45
+ },
46
+ "tune_diffusion_model": true,
47
+ "tune_projector": true,
48
+ "use_vlln": true,
49
+ "vl_self_attention_cfg": {
50
+ "_target_": "groot.model.dual_brain.module.action_head.diffusion_policy.module.cross_attention_dit.SelfAttentionTransformer",
51
+ "attention_head_dim": 64,
52
+ "dropout": 0.2,
53
+ "final_dropout": true,
54
+ "num_attention_heads": 32,
55
+ "num_layers": 4,
56
+ "positional_embeddings": null
57
+ }
58
+ }
59
+ },
60
+ "action_horizon": 16,
61
+ "architectures": [
62
+ "DualBrain"
63
+ ],
64
+ "backbone_cfg": {
65
+ "_target_": "groot.model.dual_brain.module.backbone.eagle2d5.Eagle2d5VLBackbone",
66
+ "eagle_path": "NVEagle/eagle_er-qwen3_1.7B-Siglip2_400M_stage1_5_128gpu_er_v6_1mlp_nops",
67
+ "load_bf16": false,
68
+ "project_to_dim": null,
69
+ "reproject_vision": false,
70
+ "select_layer": 12,
71
+ "tune_llm": false,
72
+ "tune_visual": true,
73
+ "use_flash_attention": true
74
+ },
75
+ "hidden_size": 2048,
76
+ "model_dtype": "float32",
77
+ "model_type": "dual_brain",
78
+ "resume_path": "/mnt/amlfs-02/shared/ruijiez/checkpoints/flare_groot_n1_5_300k_steps_0509",
79
+ "torch_dtype": "bfloat16",
80
+ "transformers_version": "4.51.1"
81
+ }
experiment_cfg/metadata.json ADDED
@@ -0,0 +1,2076 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "gr1": {
3
+ "statistics": {
4
+ "state": {
5
+ "left_arm": {
6
+ "max": [
7
+ 1.3570959568023682,
8
+ 1.5781080722808838,
9
+ 1.7425614595413208,
10
+ 0.04977008327841759,
11
+ 2.521491289138794,
12
+ 1.5104753971099854,
13
+ 1.3451268672943115
14
+ ],
15
+ "min": [
16
+ -1.7593636512756348,
17
+ -0.08899051696062088,
18
+ -1.6571800708770752,
19
+ -2.516343355178833,
20
+ -2.307133436203003,
21
+ -1.4132264852523804,
22
+ -1.4408576488494873
23
+ ],
24
+ "mean": [
25
+ -0.1721954196691514,
26
+ 0.18329788744449615,
27
+ -0.1360361576080323,
28
+ -1.0807313919067383,
29
+ 0.14450088143348713,
30
+ 0.13195440173149092,
31
+ -0.08306947350502007
32
+ ],
33
+ "std": [
34
+ 0.4068214595317843,
35
+ 0.18259239196777355,
36
+ 0.2513278424739838,
37
+ 0.5786943435668955,
38
+ 0.33050763607025135,
39
+ 0.28609949350357067,
40
+ 0.3013432323932649
41
+ ],
42
+ "q01": [
43
+ -1.6011584222316746,
44
+ -0.00048318122862838164,
45
+ -0.9488833963871,
46
+ -2.4362342357635502,
47
+ -0.9403592050075524,
48
+ -0.6489510446786882,
49
+ -0.9641011685132993
50
+ ],
51
+ "q99": [
52
+ 0.6305126994848259,
53
+ 0.7966297268867486,
54
+ 0.3873166218399999,
55
+ -0.07399308793246723,
56
+ 1.0455405175685892,
57
+ 0.8574009954929348,
58
+ 0.4838866069912913
59
+ ]
60
+ },
61
+ "left_hand": {
62
+ "max": [
63
+ 2.8765757083892822,
64
+ 2.6975045204162598,
65
+ 3.3291828632354736,
66
+ 2.7148845195770264,
67
+ 3.664421558380127,
68
+ 3.5466625690460205
69
+ ],
70
+ "min": [
71
+ -3.262805700302124,
72
+ -2.7771592140197754,
73
+ -3.4712820053100586,
74
+ -2.7828404903411865,
75
+ -2.197380781173706,
76
+ -2.636387825012207
77
+ ],
78
+ "mean": [
79
+ 0.21940384805202476,
80
+ 0.21819256246089933,
81
+ 0.20426052808761605,
82
+ 0.22833923995494854,
83
+ 0.06841608136892328,
84
+ 0.9091449975967411
85
+ ],
86
+ "std": [
87
+ 0.42784574627876293,
88
+ 0.4130409955978396,
89
+ 0.3710329830646513,
90
+ 0.4229725301265714,
91
+ 0.18603332340717335,
92
+ 0.7297042012214684
93
+ ],
94
+ "q01": [
95
+ -0.17130047902464876,
96
+ -0.0025680272374302166,
97
+ -0.0029985445667989555,
98
+ -0.002490740863140675,
99
+ -0.14894675761461273,
100
+ -0.00027310136792948466
101
+ ],
102
+ "q99": [
103
+ 1.5014824867248553,
104
+ 1.501653671264647,
105
+ 1.4043268024921445,
106
+ 1.5127789616584784,
107
+ 0.781709930300714,
108
+ 1.872669064998624
109
+ ]
110
+ },
111
+ "left_leg": {
112
+ "max": [
113
+ 0.0,
114
+ 0.0,
115
+ 0.0,
116
+ 0.0,
117
+ 5.186659473110922e-05,
118
+ 0.0
119
+ ],
120
+ "min": [
121
+ 0.0,
122
+ 0.0,
123
+ 0.0,
124
+ 0.0,
125
+ -2.446335656713927e-06,
126
+ -1.6033794963732362e-05
127
+ ],
128
+ "mean": [
129
+ 0.0,
130
+ 0.0,
131
+ 0.0,
132
+ 0.0,
133
+ 7.689799531362961e-06,
134
+ -2.635196324263233e-06
135
+ ],
136
+ "std": [
137
+ 0.0,
138
+ 0.0,
139
+ 0.0,
140
+ 0.0,
141
+ 1.7361962818540636e-05,
142
+ 5.299682925397068e-06
143
+ ],
144
+ "q01": [
145
+ 0.0,
146
+ 0.0,
147
+ 0.0,
148
+ 0.0,
149
+ -2.4463356567139293e-06,
150
+ -1.6033794963732362e-05
151
+ ],
152
+ "q99": [
153
+ 0.0,
154
+ 0.0,
155
+ 0.0,
156
+ 0.0,
157
+ 5.186659473110924e-05,
158
+ 0.0
159
+ ]
160
+ },
161
+ "neck": {
162
+ "max": [
163
+ 0.01535311620682478,
164
+ 0.05556071177124977,
165
+ 0.008969387039542198
166
+ ],
167
+ "min": [
168
+ -0.23586410284042358,
169
+ -0.03865945711731911,
170
+ -0.06754876673221588
171
+ ],
172
+ "mean": [
173
+ -0.0008622645400464534,
174
+ 5.395126208895819e-05,
175
+ -0.00013521323853638004
176
+ ],
177
+ "std": [
178
+ 0.011963724158704286,
179
+ 0.003479051403701308,
180
+ 0.003105328651145102
181
+ ],
182
+ "q01": [
183
+ -0.004923884291201831,
184
+ -0.0027977502904832337,
185
+ 0.0
186
+ ],
187
+ "q99": [
188
+ 2.992488589370624e-06,
189
+ 0.0002969176275655626,
190
+ 0.0
191
+ ]
192
+ },
193
+ "right_arm": {
194
+ "max": [
195
+ 1.7399436235427856,
196
+ 0.07876992970705032,
197
+ 2.5426626205444336,
198
+ 0.048908114433288574,
199
+ 3.000918388366699,
200
+ 1.4513440132141113,
201
+ 1.5000383853912354
202
+ ],
203
+ "min": [
204
+ -2.2743337154388428,
205
+ -2.999122142791748,
206
+ -1.7424250841140747,
207
+ -2.536545753479004,
208
+ -2.6034910678863525,
209
+ -1.5032434463500977,
210
+ -1.4913568496704102
211
+ ],
212
+ "mean": [
213
+ -0.1590277999639513,
214
+ -0.2511773109436038,
215
+ 0.08599518239498137,
216
+ -1.1420482397079474,
217
+ 0.21259814500808735,
218
+ 0.04237671568989762,
219
+ 0.03427042067050932
220
+ ],
221
+ "std": [
222
+ 0.5631562471389767,
223
+ 0.24450603127479503,
224
+ 0.3282549381256103,
225
+ 0.7310536503791792,
226
+ 0.48139148950576843,
227
+ 0.35611921548843384,
228
+ 0.5090531110763546
229
+ ],
230
+ "q01": [
231
+ -1.4233640372753156,
232
+ -1.0436977982521036,
233
+ -0.7169023394584662,
234
+ -2.4936094284057644,
235
+ -0.7377233326435084,
236
+ -0.8036172896623616,
237
+ -0.9991738587617871
238
+ ],
239
+ "q99": [
240
+ 1.167767405509949,
241
+ 0.0006303858070168673,
242
+ 0.8771779745817198,
243
+ -0.0582109544426202,
244
+ 1.470731711387636,
245
+ 0.8474899172782918,
246
+ 1.2271583080291764
247
+ ]
248
+ },
249
+ "right_hand": {
250
+ "max": [
251
+ 2.397510290145874,
252
+ 2.4422738552093506,
253
+ 3.0158462524414062,
254
+ 2.457378625869751,
255
+ 1.4605257511138916,
256
+ 2.168534278869629
257
+ ],
258
+ "min": [
259
+ -1.7595213651657104,
260
+ -1.8982670307159424,
261
+ -1.8622381687164307,
262
+ -1.8314869403839111,
263
+ -1.0045679807662964,
264
+ -0.31411829590797424
265
+ ],
266
+ "mean": [
267
+ 0.34993353486061096,
268
+ 0.3412855267524723,
269
+ 0.3271332383155817,
270
+ 0.3575766980648041,
271
+ 0.09723818302154554,
272
+ 1.0641368627548227
273
+ ],
274
+ "std": [
275
+ 0.49147421121597257,
276
+ 0.46545258164405845,
277
+ 0.44516301155090443,
278
+ 0.4866269826889039,
279
+ 0.1814946085214615,
280
+ 0.5735033154487607
281
+ ],
282
+ "q01": [
283
+ -0.004218762856908145,
284
+ -0.004575904295779761,
285
+ -0.005576773989014321,
286
+ -0.0047070790780708175,
287
+ -0.18967307358980165,
288
+ 0.03200087323784828
289
+ ],
290
+ "q99": [
291
+ 1.4994217157363876,
292
+ 1.5029035806655904,
293
+ 1.6614551365375507,
294
+ 1.5188306391239186,
295
+ 0.6953653633594535,
296
+ 1.8182492077350652
297
+ ]
298
+ },
299
+ "right_leg": {
300
+ "max": [
301
+ 0.0,
302
+ 0.0,
303
+ 0.0,
304
+ 0.0,
305
+ 5.369959126255708e-06,
306
+ 0.0
307
+ ],
308
+ "min": [
309
+ 0.0,
310
+ 0.0,
311
+ 0.0,
312
+ 0.0,
313
+ -2.446335656713927e-06,
314
+ -5.166131450096145e-05
315
+ ],
316
+ "mean": [
317
+ 0.0,
318
+ 0.0,
319
+ 0.0,
320
+ 0.0,
321
+ 8.152188684107393e-07,
322
+ -7.940252544358388e-06
323
+ ],
324
+ "std": [
325
+ 0.0,
326
+ 0.0,
327
+ 0.0,
328
+ 0.0,
329
+ 1.9208146113669496e-06,
330
+ 1.7361962818540626e-05
331
+ ],
332
+ "q01": [
333
+ 0.0,
334
+ 0.0,
335
+ 0.0,
336
+ 0.0,
337
+ -2.4463356567139293e-06,
338
+ -5.166131450096152e-05
339
+ ],
340
+ "q99": [
341
+ 0.0,
342
+ 0.0,
343
+ 0.0,
344
+ 0.0,
345
+ 5.36995912625571e-06,
346
+ 0.0
347
+ ]
348
+ },
349
+ "waist": {
350
+ "max": [
351
+ 1.0323854684829712,
352
+ 0.7102982997894287,
353
+ 0.43762317299842834
354
+ ],
355
+ "min": [
356
+ -0.8131351470947266,
357
+ -0.4901888370513916,
358
+ -0.7304351925849915
359
+ ],
360
+ "mean": [
361
+ 0.014510802924633035,
362
+ 0.014398206025362044,
363
+ -0.00020241182937752458
364
+ ],
365
+ "std": [
366
+ 0.1227026358246804,
367
+ 0.03244836255908008,
368
+ 0.007282953243702651
369
+ ],
370
+ "q01": [
371
+ -0.3365072563290599,
372
+ -0.02904987111687659,
373
+ -0.02606033049523827
374
+ ],
375
+ "q99": [
376
+ 0.48663658648729347,
377
+ 0.12612193301320107,
378
+ 0.022280203178524964
379
+ ]
380
+ }
381
+ },
382
+ "action": {
383
+ "left_arm": {
384
+ "max": [
385
+ 1.4113223552703857,
386
+ 1.8901419639587402,
387
+ 1.7754145860671997,
388
+ 7.309383363462985e-05,
389
+ 2.563594102859497,
390
+ 1.5000243186950684,
391
+ 1.4908421039581299
392
+ ],
393
+ "min": [
394
+ -2.291214942932129,
395
+ -0.001746351015754044,
396
+ -2.4999990463256836,
397
+ -2.814260482788086,
398
+ -2.3611011505126953,
399
+ -1.4937989711761475,
400
+ -1.4969758987426758
401
+ ],
402
+ "mean": [
403
+ -0.17771673202514657,
404
+ 0.18351773917675007,
405
+ -0.13720993697643286,
406
+ -1.0842658281326296,
407
+ 0.14675945043563873,
408
+ 0.12921456992626204,
409
+ -0.09504522383213035
410
+ ],
411
+ "std": [
412
+ 0.4174487590789793,
413
+ 0.18478454649448395,
414
+ 0.25687918066978443,
415
+ 0.5807053446769717,
416
+ 0.33657354116439797,
417
+ 0.29842707514762895,
418
+ 0.31536528468132013
419
+ ],
420
+ "q01": [
421
+ -1.6055770933628097,
422
+ -7.429541994952163e-07,
423
+ -0.9837155520915981,
424
+ -2.480029511451721,
425
+ -0.9457575023174288,
426
+ -0.7192997813224794,
427
+ -1.008323037624359
428
+ ],
429
+ "q99": [
430
+ 0.6335921049118061,
431
+ 0.8018405169248585,
432
+ 0.3930345878005033,
433
+ -0.06366106234490865,
434
+ 1.0508334636688252,
435
+ 0.8918659299612054,
436
+ 0.5138001590967192
437
+ ]
438
+ },
439
+ "left_hand": {
440
+ "max": [
441
+ 1.5707963705062866,
442
+ 1.646651268005371,
443
+ 1.709236979484558,
444
+ 2.0620639324188232,
445
+ 3.0,
446
+ 3.0
447
+ ],
448
+ "min": [
449
+ -1.9597030878067017,
450
+ -1.8635213375091553,
451
+ -1.9709523916244507,
452
+ -1.5,
453
+ -3.0,
454
+ 0.0
455
+ ],
456
+ "mean": [
457
+ -0.22721463441848752,
458
+ -0.21956320106983204,
459
+ -0.2144497036933901,
460
+ -0.21446840465068817,
461
+ -0.48631641268730147,
462
+ 1.6838233470916744
463
+ ],
464
+ "std": [
465
+ 1.0050891637802128,
466
+ 1.0124161243438718,
467
+ 1.0138051509857173,
468
+ 1.0200439691543575,
469
+ 2.066762685775758,
470
+ 1.4588384628295925
471
+ ],
472
+ "q01": [
473
+ -1.4999999999999987,
474
+ -1.4999999999999987,
475
+ -1.4999999999999987,
476
+ -1.4999999999999987,
477
+ -2.9999999999999973,
478
+ 0.0
479
+ ],
480
+ "q99": [
481
+ 1.4999999999999987,
482
+ 1.4999999999999987,
483
+ 1.4999999999999987,
484
+ 1.4999999999999987,
485
+ 2.9999999999999973,
486
+ 2.9999999999999973
487
+ ]
488
+ },
489
+ "left_leg": {
490
+ "max": [
491
+ 0.0,
492
+ 0.0,
493
+ 0.0,
494
+ 0.0,
495
+ 0.0,
496
+ 0.0
497
+ ],
498
+ "min": [
499
+ 0.0,
500
+ 0.0,
501
+ 0.0,
502
+ 0.0,
503
+ 0.0,
504
+ 0.0
505
+ ],
506
+ "mean": [
507
+ 0.0,
508
+ 0.0,
509
+ 0.0,
510
+ 0.0,
511
+ 0.0,
512
+ 0.0
513
+ ],
514
+ "std": [
515
+ 0.0,
516
+ 0.0,
517
+ 0.0,
518
+ 0.0,
519
+ 0.0,
520
+ 0.0
521
+ ],
522
+ "q01": [
523
+ 0.0,
524
+ 0.0,
525
+ 0.0,
526
+ 0.0,
527
+ 0.0,
528
+ 0.0
529
+ ],
530
+ "q99": [
531
+ 0.0,
532
+ 0.0,
533
+ 0.0,
534
+ 0.0,
535
+ 0.0,
536
+ 0.0
537
+ ]
538
+ },
539
+ "neck": {
540
+ "max": [
541
+ 0.0,
542
+ 0.0,
543
+ 0.0
544
+ ],
545
+ "min": [
546
+ 0.0,
547
+ 0.0,
548
+ 0.0
549
+ ],
550
+ "mean": [
551
+ 0.0,
552
+ 0.0,
553
+ 0.0
554
+ ],
555
+ "std": [
556
+ 0.0,
557
+ 0.0,
558
+ 0.0
559
+ ],
560
+ "q01": [
561
+ 0.0,
562
+ 0.0,
563
+ 0.0
564
+ ],
565
+ "q99": [
566
+ 0.0,
567
+ 0.0,
568
+ 0.0
569
+ ]
570
+ },
571
+ "right_arm": {
572
+ "max": [
573
+ 1.7834906578063965,
574
+ 0.0002448999439366162,
575
+ 2.549729585647583,
576
+ 7.45560391806066e-05,
577
+ 3.0000460147857666,
578
+ 1.4975632429122925,
579
+ 1.4998434782028198
580
+ ],
581
+ "min": [
582
+ -2.318650960922241,
583
+ -2.9999561309814453,
584
+ -1.9257696866989136,
585
+ -2.8759055137634277,
586
+ -2.620600461959839,
587
+ -1.5001521110534668,
588
+ -1.4994292259216309
589
+ ],
590
+ "mean": [
591
+ -0.16953834891319267,
592
+ -0.2535267770290373,
593
+ 0.082216002047062,
594
+ -1.1492931842803968,
595
+ 0.2176141738891601,
596
+ 0.04310995712876314,
597
+ 0.04133718833327294
598
+ ],
599
+ "std": [
600
+ 0.5752111673355106,
601
+ 0.25846862792968733,
602
+ 0.34115546941757197,
603
+ 0.7394758462905866,
604
+ 0.4870710670948032,
605
+ 0.3721199631690978,
606
+ 0.5423213839530941
607
+ ],
608
+ "q01": [
609
+ -1.4375487387180332,
610
+ -1.0682010412216194,
611
+ -0.7535711079835895,
612
+ -2.6383612155914276,
613
+ -0.7471688866615306,
614
+ -0.8604200631380092,
615
+ -1.0900398135185252
616
+ ],
617
+ "q99": [
618
+ 1.1753968834876984,
619
+ -9.999999974752431e-07,
620
+ 0.8976036489009875,
621
+ -0.0579521823674439,
622
+ 1.4876463234424584,
623
+ 0.8774339169263857,
624
+ 1.4641386926174185
625
+ ]
626
+ },
627
+ "right_hand": {
628
+ "max": [
629
+ 1.5707963705062866,
630
+ 1.7183797359466553,
631
+ 3.979951858520508,
632
+ 3.9879753589630127,
633
+ 3.0,
634
+ 3.0
635
+ ],
636
+ "min": [
637
+ -1.5,
638
+ -1.5,
639
+ -1.5,
640
+ -1.5,
641
+ -3.0,
642
+ 4.470348358154297e-08
643
+ ],
644
+ "mean": [
645
+ -0.543570637702943,
646
+ -0.5304322242736823,
647
+ -0.5179098844528184,
648
+ -0.4968528747558592,
649
+ -1.1343770027160642,
650
+ 2.128209590911863
651
+ ],
652
+ "std": [
653
+ 1.1248024702072141,
654
+ 1.1392076015472405,
655
+ 1.1426663398742682,
656
+ 1.301862597465516,
657
+ 2.199875354766848,
658
+ 0.9186902642250119
659
+ ],
660
+ "q01": [
661
+ -1.4999999999999987,
662
+ -1.4999999999999987,
663
+ -1.4999999999999987,
664
+ -1.4999999999999987,
665
+ -2.9999999999999973,
666
+ 0.04336242545396091
667
+ ],
668
+ "q99": [
669
+ 1.4999999999999987,
670
+ 1.4999999999999987,
671
+ 1.4999999999999987,
672
+ 1.4999999999999987,
673
+ 2.9999999999999973,
674
+ 2.9999999999999973
675
+ ]
676
+ },
677
+ "right_leg": {
678
+ "max": [
679
+ 0.0,
680
+ 0.0,
681
+ 0.0,
682
+ 0.0,
683
+ 0.0,
684
+ 0.0
685
+ ],
686
+ "min": [
687
+ 0.0,
688
+ 0.0,
689
+ 0.0,
690
+ 0.0,
691
+ 0.0,
692
+ 0.0
693
+ ],
694
+ "mean": [
695
+ 0.0,
696
+ 0.0,
697
+ 0.0,
698
+ 0.0,
699
+ 0.0,
700
+ 0.0
701
+ ],
702
+ "std": [
703
+ 0.0,
704
+ 0.0,
705
+ 0.0,
706
+ 0.0,
707
+ 0.0,
708
+ 0.0
709
+ ],
710
+ "q01": [
711
+ 0.0,
712
+ 0.0,
713
+ 0.0,
714
+ 0.0,
715
+ 0.0,
716
+ 0.0
717
+ ],
718
+ "q99": [
719
+ 0.0,
720
+ 0.0,
721
+ 0.0,
722
+ 0.0,
723
+ 0.0,
724
+ 0.0
725
+ ]
726
+ },
727
+ "waist": {
728
+ "max": [
729
+ 1.049119234085083,
730
+ 0.6198405623435974,
731
+ 0.45177245140075684
732
+ ],
733
+ "min": [
734
+ -0.8292319774627686,
735
+ -0.5185094475746155,
736
+ -0.37811079621315
737
+ ],
738
+ "mean": [
739
+ 0.014836843125522158,
740
+ 0.010428135283291354,
741
+ -0.00014937532250769447
742
+ ],
743
+ "std": [
744
+ 0.12568794190883642,
745
+ 0.030695030465722098,
746
+ 0.004574332851916548
747
+ ],
748
+ "q01": [
749
+ -0.3402548208832736,
750
+ -0.025130789913237084,
751
+ -0.016220059804618367
752
+ ],
753
+ "q99": [
754
+ 0.49598969072103627,
755
+ 0.12137954644858844,
756
+ 0.010726323025301106
757
+ ]
758
+ }
759
+ },
760
+ "total_trajectory_length": 3058702552,
761
+ "num_trajectories": 10987869
762
+ },
763
+ "modalities": {
764
+ "video": {
765
+ "ego_view": {
766
+ "resolution": [
767
+ 256,
768
+ 256
769
+ ],
770
+ "channels": 3,
771
+ "fps": 20.0
772
+ }
773
+ },
774
+ "state": {
775
+ "left_arm": {
776
+ "absolute": true,
777
+ "rotation_type": null,
778
+ "shape": [
779
+ 7
780
+ ],
781
+ "continuous": true
782
+ },
783
+ "left_hand": {
784
+ "absolute": true,
785
+ "rotation_type": null,
786
+ "shape": [
787
+ 6
788
+ ],
789
+ "continuous": true
790
+ },
791
+ "left_leg": {
792
+ "absolute": true,
793
+ "rotation_type": null,
794
+ "shape": [
795
+ 6
796
+ ],
797
+ "continuous": true
798
+ },
799
+ "neck": {
800
+ "absolute": true,
801
+ "rotation_type": null,
802
+ "shape": [
803
+ 3
804
+ ],
805
+ "continuous": true
806
+ },
807
+ "right_arm": {
808
+ "absolute": true,
809
+ "rotation_type": null,
810
+ "shape": [
811
+ 7
812
+ ],
813
+ "continuous": true
814
+ },
815
+ "right_hand": {
816
+ "absolute": true,
817
+ "rotation_type": null,
818
+ "shape": [
819
+ 6
820
+ ],
821
+ "continuous": true
822
+ },
823
+ "right_leg": {
824
+ "absolute": true,
825
+ "rotation_type": null,
826
+ "shape": [
827
+ 6
828
+ ],
829
+ "continuous": true
830
+ },
831
+ "waist": {
832
+ "absolute": true,
833
+ "rotation_type": null,
834
+ "shape": [
835
+ 3
836
+ ],
837
+ "continuous": true
838
+ }
839
+ },
840
+ "action": {
841
+ "left_arm": {
842
+ "absolute": true,
843
+ "rotation_type": null,
844
+ "shape": [
845
+ 7
846
+ ],
847
+ "continuous": true
848
+ },
849
+ "left_hand": {
850
+ "absolute": true,
851
+ "rotation_type": null,
852
+ "shape": [
853
+ 6
854
+ ],
855
+ "continuous": true
856
+ },
857
+ "left_leg": {
858
+ "absolute": true,
859
+ "rotation_type": null,
860
+ "shape": [
861
+ 6
862
+ ],
863
+ "continuous": true
864
+ },
865
+ "neck": {
866
+ "absolute": true,
867
+ "rotation_type": null,
868
+ "shape": [
869
+ 3
870
+ ],
871
+ "continuous": true
872
+ },
873
+ "right_arm": {
874
+ "absolute": true,
875
+ "rotation_type": null,
876
+ "shape": [
877
+ 7
878
+ ],
879
+ "continuous": true
880
+ },
881
+ "right_hand": {
882
+ "absolute": true,
883
+ "rotation_type": null,
884
+ "shape": [
885
+ 6
886
+ ],
887
+ "continuous": true
888
+ },
889
+ "right_leg": {
890
+ "absolute": true,
891
+ "rotation_type": null,
892
+ "shape": [
893
+ 6
894
+ ],
895
+ "continuous": true
896
+ },
897
+ "waist": {
898
+ "absolute": true,
899
+ "rotation_type": null,
900
+ "shape": [
901
+ 3
902
+ ],
903
+ "continuous": true
904
+ }
905
+ },
906
+ "annotation": {
907
+ "human": [
908
+ "action.verb",
909
+ "action.object",
910
+ "action.start_location",
911
+ "action.end_location",
912
+ "action.hand",
913
+ "action.rating",
914
+ "action.failure_code",
915
+ "action.remarks",
916
+ "coarse_action",
917
+ "fine_action",
918
+ "validity"
919
+ ]
920
+ }
921
+ },
922
+ "embodiment_tag": "gr1"
923
+ },
924
+ "oxe_droid": {
925
+ "statistics": {
926
+ "state": {
927
+ "eef_position": {
928
+ "max": [
929
+ 0.8575563430786133,
930
+ 0.8196876049041748,
931
+ 1.0111403465270996
932
+ ],
933
+ "min": [
934
+ -0.22192564606666565,
935
+ -0.827903687953949,
936
+ -0.24001094698905945
937
+ ],
938
+ "mean": [
939
+ 0.5301029682159424,
940
+ 0.0006127175875008106,
941
+ 0.3208114504814148
942
+ ],
943
+ "std": [
944
+ 0.11526046693325043,
945
+ 0.17508766055107117,
946
+ 0.16157574951648712
947
+ ],
948
+ "q01": [
949
+ 0.26736209958791735,
950
+ -0.4401703763008118,
951
+ -0.0427810287475586
952
+ ],
953
+ "q99": [
954
+ 0.7802324891090393,
955
+ 0.4409394279122357,
956
+ 0.782760812044144
957
+ ]
958
+ },
959
+ "eef_rotation": {
960
+ "max": [
961
+ 3.1415927410125732,
962
+ 1.5705928802490234,
963
+ 3.1415927410125732
964
+ ],
965
+ "min": [
966
+ -3.141592502593994,
967
+ -1.5703768730163574,
968
+ -3.141563653945923
969
+ ],
970
+ "mean": [
971
+ 0.3172215521335602,
972
+ -0.09119903296232224,
973
+ -0.05301975458860397
974
+ ],
975
+ "std": [
976
+ 2.781412363052368,
977
+ 0.34485235810279846,
978
+ 0.7496558427810669
979
+ ],
980
+ "q01": [
981
+ -3.1375010013580322,
982
+ -1.2102844715118408,
983
+ -2.1374536204338073
984
+ ],
985
+ "q99": [
986
+ 3.137559413909912,
987
+ 0.8736653280258189,
988
+ 2.0176499319076573
989
+ ]
990
+ },
991
+ "gripper_position": {
992
+ "max": [
993
+ 1.0
994
+ ],
995
+ "min": [
996
+ 0.0
997
+ ],
998
+ "mean": [
999
+ 0.37953677773475647
1000
+ ],
1001
+ "std": [
1002
+ 0.418218731880188
1003
+ ],
1004
+ "q01": [
1005
+ 0.0
1006
+ ],
1007
+ "q99": [
1008
+ 0.9911894202232361
1009
+ ]
1010
+ },
1011
+ "joint_position": {
1012
+ "max": [
1013
+ 2.6687583923339844,
1014
+ 1.5840554237365723,
1015
+ 2.6957037448883057,
1016
+ -0.29779934883117676,
1017
+ 2.6624162197113037,
1018
+ 4.28157901763916,
1019
+ 2.755643367767334
1020
+ ],
1021
+ "min": [
1022
+ -2.664970874786377,
1023
+ -1.6156227588653564,
1024
+ -2.680800676345825,
1025
+ -2.9409868717193604,
1026
+ -2.6705946922302246,
1027
+ 0.24893812835216522,
1028
+ -2.761866807937622
1029
+ ],
1030
+ "mean": [
1031
+ 0.015112307853996754,
1032
+ 0.2475655972957611,
1033
+ -0.015541581436991692,
1034
+ -2.0761396884918213,
1035
+ -0.03723751753568649,
1036
+ 2.5374062061309814,
1037
+ 0.08577312529087067
1038
+ ],
1039
+ "std": [
1040
+ 0.31826168298721313,
1041
+ 0.4946194291114807,
1042
+ 0.2835463881492615,
1043
+ 0.4799663722515106,
1044
+ 0.5096911191940308,
1045
+ 0.4861564040184021,
1046
+ 0.7239381670951843
1047
+ ],
1048
+ "q01": [
1049
+ -0.9358840203285217,
1050
+ -0.8845126914978028,
1051
+ -0.9516998279094696,
1052
+ -2.7735876727104185,
1053
+ -1.8058055281639098,
1054
+ 1.211466405391693,
1055
+ -2.089151668548584
1056
+ ],
1057
+ "q99": [
1058
+ 0.9981055355072064,
1059
+ 1.4211456155776983,
1060
+ 0.8372088992595699,
1061
+ -0.44636924982070836,
1062
+ 1.6697174942493462,
1063
+ 3.470238385200501,
1064
+ 2.2241294598579424
1065
+ ]
1066
+ }
1067
+ },
1068
+ "action": {
1069
+ "eef_position": {
1070
+ "max": [
1071
+ 0.9295652508735657,
1072
+ 0.8648782968521118,
1073
+ 1.074978232383728
1074
+ ],
1075
+ "min": [
1076
+ -0.22296355664730072,
1077
+ -0.8572492003440857,
1078
+ -0.3079752027988434
1079
+ ],
1080
+ "mean": [
1081
+ 0.5333008766174316,
1082
+ 0.00040175949106924236,
1083
+ 0.32263845205307007
1084
+ ],
1085
+ "std": [
1086
+ 0.11593657732009888,
1087
+ 0.17602738738059998,
1088
+ 0.16214896738529205
1089
+ ],
1090
+ "q01": [
1091
+ 0.2653030273318291,
1092
+ -0.44295967668294906,
1093
+ -0.04580880187451839
1094
+ ],
1095
+ "q99": [
1096
+ 0.792060375213623,
1097
+ 0.44292682290077234,
1098
+ 0.7875395911931997
1099
+ ]
1100
+ },
1101
+ "eef_rotation": {
1102
+ "max": [
1103
+ 3.1415927410125732,
1104
+ 1.5702463388442993,
1105
+ 3.1415891647338867
1106
+ ],
1107
+ "min": [
1108
+ -3.141592502593994,
1109
+ -1.570475697517395,
1110
+ -3.1415903568267822
1111
+ ],
1112
+ "mean": [
1113
+ 0.30634260177612305,
1114
+ -0.09434697777032852,
1115
+ -0.05444207414984703
1116
+ ],
1117
+ "std": [
1118
+ 2.7942583560943604,
1119
+ 0.3476769030094147,
1120
+ 0.756449818611145
1121
+ ],
1122
+ "q01": [
1123
+ -3.1374151706695557,
1124
+ -1.2160829985141755,
1125
+ -2.1517505979537965
1126
+ ],
1127
+ "q99": [
1128
+ 3.1374621391296387,
1129
+ 0.8804873836040525,
1130
+ 2.0309575748443613
1131
+ ]
1132
+ },
1133
+ "eef_position_delta": {
1134
+ "max": [
1135
+ 0.9999998211860657,
1136
+ 0.999991774559021,
1137
+ 0.9999973177909851
1138
+ ],
1139
+ "min": [
1140
+ -0.9999999403953552,
1141
+ -0.9999951124191284,
1142
+ -0.9999960660934448
1143
+ ],
1144
+ "mean": [
1145
+ 0.024281756952404976,
1146
+ -0.002875920385122299,
1147
+ 0.018233448266983032
1148
+ ],
1149
+ "std": [
1150
+ 0.25211915373802185,
1151
+ 0.18553106486797333,
1152
+ 0.2270297408103943
1153
+ ],
1154
+ "q01": [
1155
+ -0.7671820366382599,
1156
+ -0.5812531012296677,
1157
+ -0.5680612689256668
1158
+ ],
1159
+ "q99": [
1160
+ 0.7328272181749356,
1161
+ 0.5705102401971829,
1162
+ 0.734496554732323
1163
+ ]
1164
+ },
1165
+ "eef_rotation_delta": {
1166
+ "max": [
1167
+ 0.9999874830245972,
1168
+ 0.9999954104423523,
1169
+ 0.9999998807907104
1170
+ ],
1171
+ "min": [
1172
+ -0.9999980330467224,
1173
+ -0.9999979734420776,
1174
+ -0.9999998807907104
1175
+ ],
1176
+ "mean": [
1177
+ 0.0038623453583568335,
1178
+ -0.03020404651761055,
1179
+ -0.007724778726696968
1180
+ ],
1181
+ "std": [
1182
+ 0.21981821954250336,
1183
+ 0.22736550867557526,
1184
+ 0.29116934537887573
1185
+ ],
1186
+ "q01": [
1187
+ -0.6462401157617569,
1188
+ -0.6986615788936615,
1189
+ -0.8940888077020646
1190
+ ],
1191
+ "q99": [
1192
+ 0.6701276439428347,
1193
+ 0.6458621937036526,
1194
+ 0.8935507464408889
1195
+ ]
1196
+ },
1197
+ "gripper_position": {
1198
+ "max": [
1199
+ 1.0
1200
+ ],
1201
+ "min": [
1202
+ 0.0
1203
+ ],
1204
+ "mean": [
1205
+ 0.4207580089569092
1206
+ ],
1207
+ "std": [
1208
+ 0.4379017651081085
1209
+ ],
1210
+ "q01": [
1211
+ 0.0
1212
+ ],
1213
+ "q99": [
1214
+ 1.0
1215
+ ]
1216
+ },
1217
+ "gripper_velocity": {
1218
+ "max": [
1219
+ 1.0
1220
+ ],
1221
+ "min": [
1222
+ -1.0
1223
+ ],
1224
+ "mean": [
1225
+ 0.15944316983222961
1226
+ ],
1227
+ "std": [
1228
+ 0.43238747119903564
1229
+ ],
1230
+ "q01": [
1231
+ -1.0
1232
+ ],
1233
+ "q99": [
1234
+ 1.0
1235
+ ]
1236
+ },
1237
+ "joint_position": {
1238
+ "max": [
1239
+ 2.751160144805908,
1240
+ 1.6689813137054443,
1241
+ 2.769918203353882,
1242
+ -0.1839631199836731,
1243
+ 2.781451463699341,
1244
+ 4.402013778686523,
1245
+ 2.90183162689209
1246
+ ],
1247
+ "min": [
1248
+ -2.781099557876587,
1249
+ -1.6407934427261353,
1250
+ -2.772181749343872,
1251
+ -2.9508564472198486,
1252
+ -2.7826988697052,
1253
+ 0.17761151492595673,
1254
+ -2.901715040206909
1255
+ ],
1256
+ "mean": [
1257
+ 0.014264222234487534,
1258
+ 0.2525460422039032,
1259
+ -0.015149222686886787,
1260
+ -2.0662245750427246,
1261
+ -0.03706594184041023,
1262
+ 2.540250778198242,
1263
+ 0.08673430979251862
1264
+ ],
1265
+ "std": [
1266
+ 0.32145625352859497,
1267
+ 0.48973745107650757,
1268
+ 0.28499534726142883,
1269
+ 0.48280277848243713,
1270
+ 0.5151209831237793,
1271
+ 0.48979467153549194,
1272
+ 0.7301526665687561
1273
+ ],
1274
+ "q01": [
1275
+ -0.9461821579933166,
1276
+ -0.8763976156711578,
1277
+ -0.9654564934968948,
1278
+ -2.775611162185669,
1279
+ -1.8216787135601042,
1280
+ 1.1957919597625732,
1281
+ -2.104389636516571
1282
+ ],
1283
+ "q99": [
1284
+ 1.0098848557472246,
1285
+ 1.4265497767925264,
1286
+ 0.8507625281810767,
1287
+ -0.43711905986070543,
1288
+ 1.6865044832229614,
1289
+ 3.484763395786289,
1290
+ 2.236884739398957
1291
+ ]
1292
+ },
1293
+ "joint_velocity": {
1294
+ "max": [
1295
+ 1.0,
1296
+ 1.0,
1297
+ 1.0,
1298
+ 1.0,
1299
+ 1.0,
1300
+ 1.0,
1301
+ 1.0
1302
+ ],
1303
+ "min": [
1304
+ -1.0,
1305
+ -1.0,
1306
+ -1.0,
1307
+ -1.0,
1308
+ -1.0,
1309
+ -1.0,
1310
+ -1.0
1311
+ ],
1312
+ "mean": [
1313
+ -0.004510974511504173,
1314
+ 0.014339218847453594,
1315
+ 0.002262236550450325,
1316
+ 0.030354879796504974,
1317
+ 0.0012928518699482083,
1318
+ -0.0006219720817171037,
1319
+ 0.0033850965555757284
1320
+ ],
1321
+ "std": [
1322
+ 0.14949378371238708,
1323
+ 0.28983473777770996,
1324
+ 0.14525704085826874,
1325
+ 0.2843790650367737,
1326
+ 0.20957471430301666,
1327
+ 0.23092615604400635,
1328
+ 0.24950972199440002
1329
+ ],
1330
+ "q01": [
1331
+ -0.45862655848264694,
1332
+ -0.8009099239110946,
1333
+ -0.4387282380461693,
1334
+ -0.9120300930738449,
1335
+ -0.6156043213605881,
1336
+ -0.6251490712165833,
1337
+ -0.7382495331764221
1338
+ ],
1339
+ "q99": [
1340
+ 0.4423619061708466,
1341
+ 0.7657521367073059,
1342
+ 0.44984547525644436,
1343
+ 0.7899917733669297,
1344
+ 0.6237488234043123,
1345
+ 0.6614942812919624,
1346
+ 0.7097782391309744
1347
+ ]
1348
+ }
1349
+ },
1350
+ "total_trajectory_length": 240,
1351
+ "num_trajectories": 76500
1352
+ },
1353
+ "modalities": {
1354
+ "video": {
1355
+ "exterior_image_1_left_pad_res256_freq15": {
1356
+ "resolution": [
1357
+ 256,
1358
+ 256
1359
+ ],
1360
+ "channels": 3,
1361
+ "fps": 15.0
1362
+ },
1363
+ "exterior_image_2_left_pad_res256_freq15": {
1364
+ "resolution": [
1365
+ 256,
1366
+ 256
1367
+ ],
1368
+ "channels": 3,
1369
+ "fps": 15.0
1370
+ },
1371
+ "wrist_image_left_pad_res256_freq15": {
1372
+ "resolution": [
1373
+ 256,
1374
+ 256
1375
+ ],
1376
+ "channels": 3,
1377
+ "fps": 15.0
1378
+ }
1379
+ },
1380
+ "state": {
1381
+ "eef_position": {
1382
+ "absolute": true,
1383
+ "rotation_type": null,
1384
+ "shape": [
1385
+ 3
1386
+ ],
1387
+ "continuous": true
1388
+ },
1389
+ "eef_rotation": {
1390
+ "absolute": true,
1391
+ "rotation_type": "euler_angles_rpy",
1392
+ "shape": [
1393
+ 3
1394
+ ],
1395
+ "continuous": true
1396
+ },
1397
+ "gripper_position": {
1398
+ "absolute": true,
1399
+ "rotation_type": null,
1400
+ "shape": [
1401
+ 1
1402
+ ],
1403
+ "continuous": true
1404
+ },
1405
+ "joint_position": {
1406
+ "absolute": true,
1407
+ "rotation_type": null,
1408
+ "shape": [
1409
+ 7
1410
+ ],
1411
+ "continuous": true
1412
+ }
1413
+ },
1414
+ "action": {
1415
+ "eef_position": {
1416
+ "absolute": true,
1417
+ "rotation_type": null,
1418
+ "shape": [
1419
+ 3
1420
+ ],
1421
+ "continuous": true
1422
+ },
1423
+ "eef_rotation": {
1424
+ "absolute": true,
1425
+ "rotation_type": "euler_angles_rpy",
1426
+ "shape": [
1427
+ 3
1428
+ ],
1429
+ "continuous": true
1430
+ },
1431
+ "eef_position_delta": {
1432
+ "absolute": false,
1433
+ "rotation_type": null,
1434
+ "shape": [
1435
+ 3
1436
+ ],
1437
+ "continuous": true
1438
+ },
1439
+ "eef_rotation_delta": {
1440
+ "absolute": false,
1441
+ "rotation_type": "euler_angles_rpy",
1442
+ "shape": [
1443
+ 3
1444
+ ],
1445
+ "continuous": true
1446
+ },
1447
+ "gripper_position": {
1448
+ "absolute": true,
1449
+ "rotation_type": null,
1450
+ "shape": [
1451
+ 1
1452
+ ],
1453
+ "continuous": true
1454
+ },
1455
+ "gripper_velocity": {
1456
+ "absolute": true,
1457
+ "rotation_type": null,
1458
+ "shape": [
1459
+ 1
1460
+ ],
1461
+ "continuous": true
1462
+ },
1463
+ "joint_position": {
1464
+ "absolute": true,
1465
+ "rotation_type": null,
1466
+ "shape": [
1467
+ 7
1468
+ ],
1469
+ "continuous": true
1470
+ },
1471
+ "joint_velocity": {
1472
+ "absolute": true,
1473
+ "rotation_type": null,
1474
+ "shape": [
1475
+ 7
1476
+ ],
1477
+ "continuous": true
1478
+ }
1479
+ },
1480
+ "annotation": {
1481
+ "language": [
1482
+ "language_instruction",
1483
+ "language_instruction_2",
1484
+ "language_instruction_3"
1485
+ ]
1486
+ }
1487
+ },
1488
+ "embodiment_tag": "oxe_droid"
1489
+ },
1490
+ "agibot": {
1491
+ "statistics": {
1492
+ "state": {
1493
+ "left_arm_joint_position": {
1494
+ "max": [
1495
+ 2.8758695125579834,
1496
+ 1.9940129518508911,
1497
+ 3.1370127201080322,
1498
+ 1.4812432527542114,
1499
+ 3.139925003051758,
1500
+ 1.7610539197921753,
1501
+ 3.1404590606689453
1502
+ ],
1503
+ "min": [
1504
+ -2.966796636581421,
1505
+ -1.9090405702590942,
1506
+ -2.994210958480835,
1507
+ -1.4968260526657104,
1508
+ -3.143338441848755,
1509
+ -1.7474952936172485,
1510
+ -3.140000104904175
1511
+ ],
1512
+ "mean": [
1513
+ -0.5825905799865723,
1514
+ 0.3457416296005249,
1515
+ 0.4395892322063446,
1516
+ -0.3457416296005249,
1517
+ 0.35607415437698364,
1518
+ 0.3457416296005249,
1519
+ 0.053289320319890976
1520
+ ],
1521
+ "std": [
1522
+ 0.7518633008003235,
1523
+ 0.5130977630615234,
1524
+ 0.6901939511299133,
1525
+ 0.5059813261032104,
1526
+ 0.4821229875087738,
1527
+ 0.5896104574203491,
1528
+ 0.7191146016120911
1529
+ ],
1530
+ "q01": [
1531
+ -2.364172456264496,
1532
+ -1.2625716614723206,
1533
+ -1.2713254690170288,
1534
+ -1.47836434841156,
1535
+ -0.5956208109855652,
1536
+ -0.004502099938690662,
1537
+ -2.323017120361328
1538
+ ],
1539
+ "q99": [
1540
+ 1.5946508646011353,
1541
+ 1.5619078862667077,
1542
+ 2.279411551952361,
1543
+ 0.0322999507188797,
1544
+ 1.8872510194778442,
1545
+ 1.7200289964675903,
1546
+ 1.6950677609443652
1547
+ ]
1548
+ },
1549
+ "right_arm_joint_position": {
1550
+ "max": [
1551
+ 3.140000104904175,
1552
+ 1.8658112287521362,
1553
+ 3.1351795196533203,
1554
+ 1.5650031566619873,
1555
+ 3.1516618728637695,
1556
+ 1.7467623949050903,
1557
+ 3.140075206756592
1558
+ ],
1559
+ "min": [
1560
+ -2.8576574325561523,
1561
+ -2.0087313652038574,
1562
+ -3.140000104904175,
1563
+ -1.4823949337005615,
1564
+ -3.1418726444244385,
1565
+ -1.7603733539581299,
1566
+ -3.140000104904175
1567
+ ],
1568
+ "mean": [
1569
+ 0.6057695746421814,
1570
+ -0.34574297070503235,
1571
+ -0.4090537130832672,
1572
+ 0.3457416296005249,
1573
+ -0.3463795781135559,
1574
+ -0.3457416296005249,
1575
+ 0.014872349798679352
1576
+ ],
1577
+ "std": [
1578
+ 0.770484983921051,
1579
+ 0.4891981780529022,
1580
+ 0.7010146975517273,
1581
+ 0.5284842252731323,
1582
+ 0.4678402543067932,
1583
+ 0.5902491211891174,
1584
+ 0.6703789234161377
1585
+ ],
1586
+ "q01": [
1587
+ -1.4295430171489716,
1588
+ -1.549558162689209,
1589
+ -2.12818288564682,
1590
+ -0.00530660115648061,
1591
+ -1.7935223531723024,
1592
+ -1.7253836393356323,
1593
+ -1.5423394405841828
1594
+ ],
1595
+ "q99": [
1596
+ 2.3075180053710938,
1597
+ 1.1316230213642076,
1598
+ 1.4279094922542566,
1599
+ 1.4798182249069214,
1600
+ 0.6721548426151269,
1601
+ 0.08709267988800451,
1602
+ 2.257192373275757
1603
+ ]
1604
+ },
1605
+ "left_effector_position": {
1606
+ "max": [
1607
+ 124.75714111328125
1608
+ ],
1609
+ "min": [
1610
+ 32.57777786254883
1611
+ ],
1612
+ "mean": [
1613
+ 22.127464294433594
1614
+ ],
1615
+ "std": [
1616
+ 53.21950149536133
1617
+ ],
1618
+ "q01": [
1619
+ 34.53333282470703
1620
+ ],
1621
+ "q99": [
1622
+ 122.0999984741211
1623
+ ]
1624
+ },
1625
+ "right_effector_position": {
1626
+ "max": [
1627
+ 124.16011810302734
1628
+ ],
1629
+ "min": [
1630
+ 33.599998474121094
1631
+ ],
1632
+ "mean": [
1633
+ 22.127464294433594
1634
+ ],
1635
+ "std": [
1636
+ 53.21950149536133
1637
+ ],
1638
+ "q01": [
1639
+ 34.66666793823242
1640
+ ],
1641
+ "q99": [
1642
+ 122.1443862915039
1643
+ ]
1644
+ },
1645
+ "head_position": {
1646
+ "max": [
1647
+ 0.4363349676132202,
1648
+ 0.5238837003707886
1649
+ ],
1650
+ "min": [
1651
+ -0.2792723774909973,
1652
+ -0.14546717703342438
1653
+ ],
1654
+ "mean": [
1655
+ -0.005973346531391144,
1656
+ 0.17287081480026245
1657
+ ],
1658
+ "std": [
1659
+ 0.03377634286880493,
1660
+ 0.14699949324131012
1661
+ ],
1662
+ "q01": [
1663
+ -0.1745089590549469,
1664
+ 0.1396242082118988
1665
+ ],
1666
+ "q99": [
1667
+ 0.087269127368927,
1668
+ 0.5236093997955322
1669
+ ]
1670
+ },
1671
+ "waist_position": {
1672
+ "max": [
1673
+ 0.8028613924980164,
1674
+ 0.6108654737472534
1675
+ ],
1676
+ "min": [
1677
+ -0.0001549959706608206,
1678
+ -1.6639254681649618e-05
1679
+ ],
1680
+ "mean": [
1681
+ 0.17287081480026245,
1682
+ 0.08938886225223541
1683
+ ],
1684
+ "std": [
1685
+ 0.20994052290916443,
1686
+ 0.15571819245815277
1687
+ ],
1688
+ "q01": [
1689
+ 7.989482924131153e-07,
1690
+ 0.0
1691
+ ],
1692
+ "q99": [
1693
+ 0.6806802153587341,
1694
+ 0.45334962010383606
1695
+ ]
1696
+ }
1697
+ },
1698
+ "action": {
1699
+ "left_arm_joint_position": {
1700
+ "max": [
1701
+ 2.8758695125579834,
1702
+ 1.9940129518508911,
1703
+ 3.1370127201080322,
1704
+ 1.4812432527542114,
1705
+ 3.139925003051758,
1706
+ 1.7610539197921753,
1707
+ 3.1404590606689453
1708
+ ],
1709
+ "min": [
1710
+ -2.966796636581421,
1711
+ -1.9090405702590942,
1712
+ -2.994210958480835,
1713
+ -1.4968260526657104,
1714
+ -3.143338441848755,
1715
+ -1.7474952936172485,
1716
+ -3.140000104904175
1717
+ ],
1718
+ "mean": [
1719
+ -0.5825905799865723,
1720
+ 0.3457416296005249,
1721
+ 0.4395892322063446,
1722
+ -0.3457416296005249,
1723
+ 0.35607415437698364,
1724
+ 0.3457416296005249,
1725
+ 0.053289320319890976
1726
+ ],
1727
+ "std": [
1728
+ 0.7518633008003235,
1729
+ 0.5130977630615234,
1730
+ 0.6901939511299133,
1731
+ 0.5059813261032104,
1732
+ 0.4821229875087738,
1733
+ 0.5896104574203491,
1734
+ 0.7191146016120911
1735
+ ],
1736
+ "q01": [
1737
+ -2.364172456264496,
1738
+ -1.2625716614723206,
1739
+ -1.2713254690170288,
1740
+ -1.47836434841156,
1741
+ -0.5956208109855652,
1742
+ -0.004502099938690662,
1743
+ -2.323017120361328
1744
+ ],
1745
+ "q99": [
1746
+ 1.5946508646011353,
1747
+ 1.5619078862667077,
1748
+ 2.279411551952361,
1749
+ 0.0322999507188797,
1750
+ 1.8872510194778442,
1751
+ 1.7200289964675903,
1752
+ 1.6950677609443652
1753
+ ]
1754
+ },
1755
+ "right_arm_joint_position": {
1756
+ "max": [
1757
+ 3.140000104904175,
1758
+ 1.8658112287521362,
1759
+ 3.1351795196533203,
1760
+ 1.5650031566619873,
1761
+ 3.1516618728637695,
1762
+ 1.7467623949050903,
1763
+ 3.140075206756592
1764
+ ],
1765
+ "min": [
1766
+ -2.8576574325561523,
1767
+ -2.0087313652038574,
1768
+ -3.140000104904175,
1769
+ -1.4823949337005615,
1770
+ -3.1418726444244385,
1771
+ -1.7603733539581299,
1772
+ -3.140000104904175
1773
+ ],
1774
+ "mean": [
1775
+ 0.6057695746421814,
1776
+ -0.34574297070503235,
1777
+ -0.4090537130832672,
1778
+ 0.3457416296005249,
1779
+ -0.3463795781135559,
1780
+ -0.3457416296005249,
1781
+ 0.014872349798679352
1782
+ ],
1783
+ "std": [
1784
+ 0.770484983921051,
1785
+ 0.4891981780529022,
1786
+ 0.7010146975517273,
1787
+ 0.5284842252731323,
1788
+ 0.4678402543067932,
1789
+ 0.5902491211891174,
1790
+ 0.6703789234161377
1791
+ ],
1792
+ "q01": [
1793
+ -1.4295430171489716,
1794
+ -1.549558162689209,
1795
+ -2.12818288564682,
1796
+ -0.00530660115648061,
1797
+ -1.7935223531723024,
1798
+ -1.7253836393356323,
1799
+ -1.5423394405841828
1800
+ ],
1801
+ "q99": [
1802
+ 2.3075180053710938,
1803
+ 1.1316230213642076,
1804
+ 1.4279094922542566,
1805
+ 1.4798182249069214,
1806
+ 0.6721548426151269,
1807
+ 0.08709267988800451,
1808
+ 2.257192373275757
1809
+ ]
1810
+ },
1811
+ "left_effector_position": {
1812
+ "max": [
1813
+ 1.0
1814
+ ],
1815
+ "min": [
1816
+ 0.0
1817
+ ],
1818
+ "mean": [
1819
+ 0.17287081480026245
1820
+ ],
1821
+ "std": [
1822
+ 0.4157773554325104
1823
+ ],
1824
+ "q01": [
1825
+ 0.0
1826
+ ],
1827
+ "q99": [
1828
+ 1.0
1829
+ ]
1830
+ },
1831
+ "right_effector_position": {
1832
+ "max": [
1833
+ 1.0
1834
+ ],
1835
+ "min": [
1836
+ 0.0
1837
+ ],
1838
+ "mean": [
1839
+ 0.17287081480026245
1840
+ ],
1841
+ "std": [
1842
+ 0.4157773554325104
1843
+ ],
1844
+ "q01": [
1845
+ 0.0
1846
+ ],
1847
+ "q99": [
1848
+ 1.0
1849
+ ]
1850
+ },
1851
+ "head_position": {
1852
+ "max": [
1853
+ 0.4363349676132202,
1854
+ 0.5238837003707886
1855
+ ],
1856
+ "min": [
1857
+ -0.2792723774909973,
1858
+ -0.14546717703342438
1859
+ ],
1860
+ "mean": [
1861
+ -0.005973346531391144,
1862
+ 0.17287081480026245
1863
+ ],
1864
+ "std": [
1865
+ 0.03377634286880493,
1866
+ 0.14699949324131012
1867
+ ],
1868
+ "q01": [
1869
+ -0.1745089590549469,
1870
+ 0.1396242082118988
1871
+ ],
1872
+ "q99": [
1873
+ 0.087269127368927,
1874
+ 0.5236093997955322
1875
+ ]
1876
+ },
1877
+ "waist_position": {
1878
+ "max": [
1879
+ 0.8028613924980164,
1880
+ 0.6108654737472534
1881
+ ],
1882
+ "min": [
1883
+ -0.0001549959706608206,
1884
+ -1.6639254681649618e-05
1885
+ ],
1886
+ "mean": [
1887
+ 0.17287081480026245,
1888
+ 0.08938886225223541
1889
+ ],
1890
+ "std": [
1891
+ 0.20994052290916443,
1892
+ 0.15571819245815277
1893
+ ],
1894
+ "q01": [
1895
+ 7.989482924131153e-07,
1896
+ 0.0
1897
+ ],
1898
+ "q99": [
1899
+ 0.6806802153587341,
1900
+ 0.45334962010383606
1901
+ ]
1902
+ },
1903
+ "robot_velocity": {
1904
+ "max": [
1905
+ 1.600000023841858,
1906
+ 1.0
1907
+ ],
1908
+ "min": [
1909
+ -1.600000023841858,
1910
+ -1.0
1911
+ ],
1912
+ "mean": [
1913
+ 0.004304705653339624,
1914
+ -0.00043726855074055493
1915
+ ],
1916
+ "std": [
1917
+ 0.1554863303899765,
1918
+ 0.07045048475265503
1919
+ ],
1920
+ "q01": [
1921
+ 0.0,
1922
+ 0.0
1923
+ ],
1924
+ "q99": [
1925
+ 0.0,
1926
+ 0.0
1927
+ ]
1928
+ }
1929
+ },
1930
+ "total_trajectory_length": 97050602,
1931
+ "num_trajectories": 189774
1932
+ },
1933
+ "modalities": {
1934
+ "video": {
1935
+ "top_head_pad_res256_freq10": {
1936
+ "resolution": [
1937
+ 256,
1938
+ 256
1939
+ ],
1940
+ "channels": 3,
1941
+ "fps": 30.0
1942
+ },
1943
+ "hand_left_pad_res256_freq10": {
1944
+ "resolution": [
1945
+ 256,
1946
+ 256
1947
+ ],
1948
+ "channels": 3,
1949
+ "fps": 30.0
1950
+ },
1951
+ "hand_right_pad_res256_freq10": {
1952
+ "resolution": [
1953
+ 256,
1954
+ 256
1955
+ ],
1956
+ "channels": 3,
1957
+ "fps": 30.0
1958
+ }
1959
+ },
1960
+ "state": {
1961
+ "left_arm_joint_position": {
1962
+ "absolute": true,
1963
+ "rotation_type": null,
1964
+ "shape": [
1965
+ 7
1966
+ ],
1967
+ "continuous": true
1968
+ },
1969
+ "right_arm_joint_position": {
1970
+ "absolute": true,
1971
+ "rotation_type": null,
1972
+ "shape": [
1973
+ 7
1974
+ ],
1975
+ "continuous": true
1976
+ },
1977
+ "left_effector_position": {
1978
+ "absolute": true,
1979
+ "rotation_type": null,
1980
+ "shape": [
1981
+ 1
1982
+ ],
1983
+ "continuous": true
1984
+ },
1985
+ "right_effector_position": {
1986
+ "absolute": true,
1987
+ "rotation_type": null,
1988
+ "shape": [
1989
+ 1
1990
+ ],
1991
+ "continuous": true
1992
+ },
1993
+ "head_position": {
1994
+ "absolute": true,
1995
+ "rotation_type": null,
1996
+ "shape": [
1997
+ 2
1998
+ ],
1999
+ "continuous": true
2000
+ },
2001
+ "waist_position": {
2002
+ "absolute": true,
2003
+ "rotation_type": null,
2004
+ "shape": [
2005
+ 2
2006
+ ],
2007
+ "continuous": true
2008
+ }
2009
+ },
2010
+ "action": {
2011
+ "left_arm_joint_position": {
2012
+ "absolute": true,
2013
+ "rotation_type": null,
2014
+ "shape": [
2015
+ 7
2016
+ ],
2017
+ "continuous": true
2018
+ },
2019
+ "right_arm_joint_position": {
2020
+ "absolute": true,
2021
+ "rotation_type": null,
2022
+ "shape": [
2023
+ 7
2024
+ ],
2025
+ "continuous": true
2026
+ },
2027
+ "left_effector_position": {
2028
+ "absolute": true,
2029
+ "rotation_type": null,
2030
+ "shape": [
2031
+ 1
2032
+ ],
2033
+ "continuous": true
2034
+ },
2035
+ "right_effector_position": {
2036
+ "absolute": true,
2037
+ "rotation_type": null,
2038
+ "shape": [
2039
+ 1
2040
+ ],
2041
+ "continuous": true
2042
+ },
2043
+ "head_position": {
2044
+ "absolute": true,
2045
+ "rotation_type": null,
2046
+ "shape": [
2047
+ 2
2048
+ ],
2049
+ "continuous": true
2050
+ },
2051
+ "waist_position": {
2052
+ "absolute": true,
2053
+ "rotation_type": null,
2054
+ "shape": [
2055
+ 2
2056
+ ],
2057
+ "continuous": true
2058
+ },
2059
+ "robot_velocity": {
2060
+ "absolute": true,
2061
+ "rotation_type": null,
2062
+ "shape": [
2063
+ 2
2064
+ ],
2065
+ "continuous": true
2066
+ }
2067
+ },
2068
+ "annotation": {
2069
+ "language": [
2070
+ "action_text"
2071
+ ]
2072
+ }
2073
+ },
2074
+ "embodiment_tag": "agibot"
2075
+ }
2076
+ }
latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step152000
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a00a4cee4b1591c06440ec1167b3b8d762957d50fa00befbe98a71663e0e4586
3
+ size 4973058384
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83e33f0ac2e0745241bfdaa6f5ad8dec684d6de5146c7efe46a789143dab0d17
3
+ size 2414539196
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57a1a9a747c0d38ca56856bfd2e8f9664c75550b9e1302e9340a34fdcbcc6400
3
+ size 73200752
zero_to_fp32.py ADDED
@@ -0,0 +1,604 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright (c) Microsoft Corporation.
4
+ # SPDX-License-Identifier: Apache-2.0
5
+
6
+ # DeepSpeed Team
7
+
8
+ # This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets
9
+ # copied into the top level checkpoint dir, so the user can easily do the conversion at any point in
10
+ # the future. Once extracted, the weights don't require DeepSpeed and can be used in any
11
+ # application.
12
+ #
13
+ # example: python zero_to_fp32.py . pytorch_model.bin
14
+
15
+ import argparse
16
+ import torch
17
+ import glob
18
+ import math
19
+ import os
20
+ import re
21
+ from collections import OrderedDict
22
+ from dataclasses import dataclass
23
+
24
+ # while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with
25
+ # DeepSpeed data structures it has to be available in the current python environment.
26
+ from deepspeed.utils import logger
27
+ from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS,
28
+ FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES,
29
+ FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS)
30
+
31
+
32
+ @dataclass
33
+ class zero_model_state:
34
+ buffers: dict()
35
+ param_shapes: dict()
36
+ shared_params: list
37
+ ds_version: int
38
+ frozen_param_shapes: dict()
39
+ frozen_param_fragments: dict()
40
+
41
+
42
+ debug = 0
43
+
44
+ # load to cpu
45
+ device = torch.device('cpu')
46
+
47
+
48
+ def atoi(text):
49
+ return int(text) if text.isdigit() else text
50
+
51
+
52
+ def natural_keys(text):
53
+ '''
54
+ alist.sort(key=natural_keys) sorts in human order
55
+ http://nedbatchelder.com/blog/200712/human_sorting.html
56
+ (See Toothy's implementation in the comments)
57
+ '''
58
+ return [atoi(c) for c in re.split(r'(\d+)', text)]
59
+
60
+
61
+ def get_model_state_file(checkpoint_dir, zero_stage):
62
+ if not os.path.isdir(checkpoint_dir):
63
+ raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist")
64
+
65
+ # there should be only one file
66
+ if zero_stage <= 2:
67
+ file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt")
68
+ elif zero_stage == 3:
69
+ file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt")
70
+
71
+ if not os.path.exists(file):
72
+ raise FileNotFoundError(f"can't find model states file at '{file}'")
73
+
74
+ return file
75
+
76
+
77
+ def get_checkpoint_files(checkpoint_dir, glob_pattern):
78
+ # XXX: need to test that this simple glob rule works for multi-node setup too
79
+ ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys)
80
+
81
+ if len(ckpt_files) == 0:
82
+ raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'")
83
+
84
+ return ckpt_files
85
+
86
+
87
+ def get_optim_files(checkpoint_dir):
88
+ return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt")
89
+
90
+
91
+ def get_model_state_files(checkpoint_dir):
92
+ return get_checkpoint_files(checkpoint_dir, "*_model_states.pt")
93
+
94
+
95
+ def parse_model_states(files):
96
+ zero_model_states = []
97
+ for file in files:
98
+ state_dict = torch.load(file, map_location=device)
99
+
100
+ if BUFFER_NAMES not in state_dict:
101
+ raise ValueError(f"{file} is not a model state checkpoint")
102
+ buffer_names = state_dict[BUFFER_NAMES]
103
+ if debug:
104
+ print("Found buffers:", buffer_names)
105
+
106
+ # recover just the buffers while restoring them to fp32 if they were saved in fp16
107
+ buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names}
108
+ param_shapes = state_dict[PARAM_SHAPES]
109
+
110
+ # collect parameters that are included in param_shapes
111
+ param_names = []
112
+ for s in param_shapes:
113
+ for name in s.keys():
114
+ param_names.append(name)
115
+
116
+ # update with frozen parameters
117
+ frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None)
118
+ if frozen_param_shapes is not None:
119
+ if debug:
120
+ print(f"Found frozen_param_shapes: {frozen_param_shapes}")
121
+ param_names += list(frozen_param_shapes.keys())
122
+
123
+ # handle shared params
124
+ shared_params = [[k, v] for k, v in state_dict["shared_params"].items()]
125
+
126
+ ds_version = state_dict.get(DS_VERSION, None)
127
+
128
+ frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None)
129
+
130
+ z_model_state = zero_model_state(buffers=buffers,
131
+ param_shapes=param_shapes,
132
+ shared_params=shared_params,
133
+ ds_version=ds_version,
134
+ frozen_param_shapes=frozen_param_shapes,
135
+ frozen_param_fragments=frozen_param_fragments)
136
+ zero_model_states.append(z_model_state)
137
+
138
+ return zero_model_states
139
+
140
+
141
+ def parse_optim_states(files, ds_checkpoint_dir):
142
+
143
+ total_files = len(files)
144
+ state_dicts = []
145
+ for f in files:
146
+ state_dict = torch.load(f, map_location=device)
147
+ # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights
148
+ # and also handle the case where it was already removed by another helper script
149
+ state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None)
150
+ state_dicts.append(state_dict)
151
+
152
+ if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]:
153
+ raise ValueError(f"{files[0]} is not a zero checkpoint")
154
+ zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE]
155
+ world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT]
156
+
157
+ # For ZeRO-2 each param group can have different partition_count as data parallelism for expert
158
+ # parameters can be different from data parallelism for non-expert parameters. So we can just
159
+ # use the max of the partition_count to get the dp world_size.
160
+
161
+ if type(world_size) is list:
162
+ world_size = max(world_size)
163
+
164
+ if world_size != total_files:
165
+ raise ValueError(
166
+ f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. "
167
+ "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes."
168
+ )
169
+
170
+ # the groups are named differently in each stage
171
+ if zero_stage <= 2:
172
+ fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS
173
+ elif zero_stage == 3:
174
+ fp32_groups_key = FP32_FLAT_GROUPS
175
+ else:
176
+ raise ValueError(f"unknown zero stage {zero_stage}")
177
+
178
+ if zero_stage <= 2:
179
+ fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))]
180
+ elif zero_stage == 3:
181
+ # if there is more than one param group, there will be multiple flattened tensors - one
182
+ # flattened tensor per group - for simplicity merge them into a single tensor
183
+ #
184
+ # XXX: could make the script more memory efficient for when there are multiple groups - it
185
+ # will require matching the sub-lists of param_shapes for each param group flattened tensor
186
+
187
+ fp32_flat_groups = [
188
+ torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts))
189
+ ]
190
+
191
+ return zero_stage, world_size, fp32_flat_groups
192
+
193
+
194
+ def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters):
195
+ """
196
+ Returns fp32 state_dict reconstructed from ds checkpoint
197
+
198
+ Args:
199
+ - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are)
200
+
201
+ """
202
+ print(f"Processing zero checkpoint '{ds_checkpoint_dir}'")
203
+
204
+ optim_files = get_optim_files(ds_checkpoint_dir)
205
+ zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir)
206
+ print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}")
207
+
208
+ model_files = get_model_state_files(ds_checkpoint_dir)
209
+
210
+ zero_model_states = parse_model_states(model_files)
211
+ print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}')
212
+
213
+ if zero_stage <= 2:
214
+ return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
215
+ exclude_frozen_parameters)
216
+ elif zero_stage == 3:
217
+ return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
218
+ exclude_frozen_parameters)
219
+
220
+
221
+ def _zero2_merge_frozen_params(state_dict, zero_model_states):
222
+ if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
223
+ return
224
+
225
+ frozen_param_shapes = zero_model_states[0].frozen_param_shapes
226
+ frozen_param_fragments = zero_model_states[0].frozen_param_fragments
227
+
228
+ if debug:
229
+ num_elem = sum(s.numel() for s in frozen_param_shapes.values())
230
+ print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
231
+
232
+ wanted_params = len(frozen_param_shapes)
233
+ wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
234
+ avail_numel = sum([p.numel() for p in frozen_param_fragments.values()])
235
+ print(f'Frozen params: Have {avail_numel} numels to process.')
236
+ print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
237
+
238
+ total_params = 0
239
+ total_numel = 0
240
+ for name, shape in frozen_param_shapes.items():
241
+ total_params += 1
242
+ unpartitioned_numel = shape.numel()
243
+ total_numel += unpartitioned_numel
244
+
245
+ state_dict[name] = frozen_param_fragments[name]
246
+
247
+ if debug:
248
+ print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
249
+
250
+ print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
251
+
252
+
253
+ def _has_callable(obj, fn):
254
+ attr = getattr(obj, fn, None)
255
+ return callable(attr)
256
+
257
+
258
+ def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
259
+ param_shapes = zero_model_states[0].param_shapes
260
+
261
+ # Reconstruction protocol:
262
+ #
263
+ # XXX: document this
264
+
265
+ if debug:
266
+ for i in range(world_size):
267
+ for j in range(len(fp32_flat_groups[0])):
268
+ print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}")
269
+
270
+ # XXX: memory usage doubles here (zero2)
271
+ num_param_groups = len(fp32_flat_groups[0])
272
+ merged_single_partition_of_fp32_groups = []
273
+ for i in range(num_param_groups):
274
+ merged_partitions = [sd[i] for sd in fp32_flat_groups]
275
+ full_single_fp32_vector = torch.cat(merged_partitions, 0)
276
+ merged_single_partition_of_fp32_groups.append(full_single_fp32_vector)
277
+ avail_numel = sum(
278
+ [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups])
279
+
280
+ if debug:
281
+ wanted_params = sum([len(shapes) for shapes in param_shapes])
282
+ wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes])
283
+ # not asserting if there is a mismatch due to possible padding
284
+ print(f"Have {avail_numel} numels to process.")
285
+ print(f"Need {wanted_numel} numels in {wanted_params} params.")
286
+
287
+ # params
288
+ # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
289
+ # out-of-core computing solution
290
+ total_numel = 0
291
+ total_params = 0
292
+ for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups):
293
+ offset = 0
294
+ avail_numel = full_single_fp32_vector.numel()
295
+ for name, shape in shapes.items():
296
+
297
+ unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape)
298
+ total_numel += unpartitioned_numel
299
+ total_params += 1
300
+
301
+ if debug:
302
+ print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
303
+ state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape)
304
+ offset += unpartitioned_numel
305
+
306
+ # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and
307
+ # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex
308
+ # paddings performed in the code it's almost impossible to predict the exact numbers w/o the
309
+ # live optimizer object, so we are checking that the numbers are within the right range
310
+ align_to = 2 * world_size
311
+
312
+ def zero2_align(x):
313
+ return align_to * math.ceil(x / align_to)
314
+
315
+ if debug:
316
+ print(f"original offset={offset}, avail_numel={avail_numel}")
317
+
318
+ offset = zero2_align(offset)
319
+ avail_numel = zero2_align(avail_numel)
320
+
321
+ if debug:
322
+ print(f"aligned offset={offset}, avail_numel={avail_numel}")
323
+
324
+ # Sanity check
325
+ if offset != avail_numel:
326
+ raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
327
+
328
+ print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements")
329
+
330
+
331
+ def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
332
+ exclude_frozen_parameters):
333
+ state_dict = OrderedDict()
334
+
335
+ # buffers
336
+ buffers = zero_model_states[0].buffers
337
+ state_dict.update(buffers)
338
+ if debug:
339
+ print(f"added {len(buffers)} buffers")
340
+
341
+ if not exclude_frozen_parameters:
342
+ _zero2_merge_frozen_params(state_dict, zero_model_states)
343
+
344
+ _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
345
+
346
+ # recover shared parameters
347
+ for pair in zero_model_states[0].shared_params:
348
+ if pair[1] in state_dict:
349
+ state_dict[pair[0]] = state_dict[pair[1]]
350
+
351
+ return state_dict
352
+
353
+
354
+ def zero3_partitioned_param_info(unpartitioned_numel, world_size):
355
+ remainder = unpartitioned_numel % world_size
356
+ padding_numel = (world_size - remainder) if remainder else 0
357
+ partitioned_numel = math.ceil(unpartitioned_numel / world_size)
358
+ return partitioned_numel, padding_numel
359
+
360
+
361
+ def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states):
362
+ if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
363
+ return
364
+
365
+ if debug:
366
+ for i in range(world_size):
367
+ num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values())
368
+ print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
369
+
370
+ frozen_param_shapes = zero_model_states[0].frozen_param_shapes
371
+ wanted_params = len(frozen_param_shapes)
372
+ wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
373
+ avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size
374
+ print(f'Frozen params: Have {avail_numel} numels to process.')
375
+ print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
376
+
377
+ total_params = 0
378
+ total_numel = 0
379
+ for name, shape in zero_model_states[0].frozen_param_shapes.items():
380
+ total_params += 1
381
+ unpartitioned_numel = shape.numel()
382
+ total_numel += unpartitioned_numel
383
+
384
+ param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states)
385
+ state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape)
386
+
387
+ partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
388
+
389
+ if debug:
390
+ print(
391
+ f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
392
+ )
393
+
394
+ print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
395
+
396
+
397
+ def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
398
+ param_shapes = zero_model_states[0].param_shapes
399
+ avail_numel = fp32_flat_groups[0].numel() * world_size
400
+ # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each
401
+ # param, re-consolidating each param, while dealing with padding if any
402
+
403
+ # merge list of dicts, preserving order
404
+ param_shapes = {k: v for d in param_shapes for k, v in d.items()}
405
+
406
+ if debug:
407
+ for i in range(world_size):
408
+ print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}")
409
+
410
+ wanted_params = len(param_shapes)
411
+ wanted_numel = sum(shape.numel() for shape in param_shapes.values())
412
+ # not asserting if there is a mismatch due to possible padding
413
+ avail_numel = fp32_flat_groups[0].numel() * world_size
414
+ print(f"Trainable params: Have {avail_numel} numels to process.")
415
+ print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.")
416
+
417
+ # params
418
+ # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
419
+ # out-of-core computing solution
420
+ offset = 0
421
+ total_numel = 0
422
+ total_params = 0
423
+ for name, shape in param_shapes.items():
424
+
425
+ unpartitioned_numel = shape.numel()
426
+ total_numel += unpartitioned_numel
427
+ total_params += 1
428
+
429
+ partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
430
+
431
+ if debug:
432
+ print(
433
+ f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
434
+ )
435
+
436
+ # XXX: memory usage doubles here
437
+ state_dict[name] = torch.cat(
438
+ tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)),
439
+ 0).narrow(0, 0, unpartitioned_numel).view(shape)
440
+ offset += partitioned_numel
441
+
442
+ offset *= world_size
443
+
444
+ # Sanity check
445
+ if offset != avail_numel:
446
+ raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
447
+
448
+ print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements")
449
+
450
+
451
+ def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
452
+ exclude_frozen_parameters):
453
+ state_dict = OrderedDict()
454
+
455
+ # buffers
456
+ buffers = zero_model_states[0].buffers
457
+ state_dict.update(buffers)
458
+ if debug:
459
+ print(f"added {len(buffers)} buffers")
460
+
461
+ if not exclude_frozen_parameters:
462
+ _zero3_merge_frozen_params(state_dict, world_size, zero_model_states)
463
+
464
+ _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
465
+
466
+ # recover shared parameters
467
+ for pair in zero_model_states[0].shared_params:
468
+ if pair[1] in state_dict:
469
+ state_dict[pair[0]] = state_dict[pair[1]]
470
+
471
+ return state_dict
472
+
473
+
474
+ def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None, exclude_frozen_parameters=False):
475
+ """
476
+ Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with
477
+ ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example
478
+ via a model hub.
479
+
480
+ Args:
481
+ - ``checkpoint_dir``: path to the desired checkpoint folder
482
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14``
483
+ - ``exclude_frozen_parameters``: exclude frozen parameters
484
+
485
+ Returns:
486
+ - pytorch ``state_dict``
487
+
488
+ Note: this approach may not work if your application doesn't have sufficient free CPU memory and
489
+ you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with
490
+ the checkpoint.
491
+
492
+ A typical usage might be ::
493
+
494
+ from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
495
+ # do the training and checkpoint saving
496
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu
497
+ model = model.cpu() # move to cpu
498
+ model.load_state_dict(state_dict)
499
+ # submit to model hub or save the model to share with others
500
+
501
+ In this example the ``model`` will no longer be usable in the deepspeed context of the same
502
+ application. i.e. you will need to re-initialize the deepspeed engine, since
503
+ ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
504
+
505
+ If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead.
506
+
507
+ """
508
+ if tag is None:
509
+ latest_path = os.path.join(checkpoint_dir, 'latest')
510
+ if os.path.isfile(latest_path):
511
+ with open(latest_path, 'r') as fd:
512
+ tag = fd.read().strip()
513
+ else:
514
+ raise ValueError(f"Unable to find 'latest' file at {latest_path}")
515
+
516
+ ds_checkpoint_dir = os.path.join(checkpoint_dir, tag)
517
+
518
+ if not os.path.isdir(ds_checkpoint_dir):
519
+ raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist")
520
+
521
+ return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters)
522
+
523
+
524
+ def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, output_file, tag=None, exclude_frozen_parameters=False):
525
+ """
526
+ Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be
527
+ loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed.
528
+
529
+ Args:
530
+ - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
531
+ - ``output_file``: path to the pytorch fp32 state_dict output file (e.g. path/pytorch_model.bin)
532
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
533
+ - ``exclude_frozen_parameters``: exclude frozen parameters
534
+ """
535
+
536
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag, exclude_frozen_parameters)
537
+ print(f"Saving fp32 state dict to {output_file}")
538
+ torch.save(state_dict, output_file)
539
+
540
+
541
+ def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None):
542
+ """
543
+ 1. Put the provided model to cpu
544
+ 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict``
545
+ 3. Load it into the provided model
546
+
547
+ Args:
548
+ - ``model``: the model object to update
549
+ - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
550
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
551
+
552
+ Returns:
553
+ - ``model`: modified model
554
+
555
+ Make sure you have plenty of CPU memory available before you call this function. If you don't
556
+ have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it
557
+ conveniently placed for you in the checkpoint folder.
558
+
559
+ A typical usage might be ::
560
+
561
+ from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
562
+ model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
563
+ # submit to model hub or save the model to share with others
564
+
565
+ Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context
566
+ of the same application. i.e. you will need to re-initialize the deepspeed engine, since
567
+ ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
568
+
569
+ """
570
+ logger.info(f"Extracting fp32 weights")
571
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
572
+
573
+ logger.info(f"Overwriting model with fp32 weights")
574
+ model = model.cpu()
575
+ model.load_state_dict(state_dict, strict=False)
576
+
577
+ return model
578
+
579
+
580
+ if __name__ == "__main__":
581
+
582
+ parser = argparse.ArgumentParser()
583
+ parser.add_argument("checkpoint_dir",
584
+ type=str,
585
+ help="path to the desired checkpoint folder, e.g., path/checkpoint-12")
586
+ parser.add_argument(
587
+ "output_file",
588
+ type=str,
589
+ help="path to the pytorch fp32 state_dict output file (e.g. path/checkpoint-12/pytorch_model.bin)")
590
+ parser.add_argument("-t",
591
+ "--tag",
592
+ type=str,
593
+ default=None,
594
+ help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1")
595
+ parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters")
596
+ parser.add_argument("-d", "--debug", action='store_true', help="enable debug")
597
+ args = parser.parse_args()
598
+
599
+ debug = args.debug
600
+
601
+ convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir,
602
+ args.output_file,
603
+ tag=args.tag,
604
+ exclude_frozen_parameters=args.exclude_frozen_parameters)