lzy commited on
Commit
189e351
·
1 Parent(s): 9de12d9

Add model weights

Browse files
Files changed (4) hide show
  1. checkpoints/post_ckpt.pt +3 -0
  2. config.json +60 -0
  3. config.yaml +56 -0
  4. dataset_statistics.json +136 -0
checkpoints/post_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33d897704f1d358b0c28dd244684587f48fc5cae72eb626c6ba471c69cf88958
3
+ size 33260216370
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 7,
3
+ "action_tokenizer_exist": false,
4
+ "class_dropout_prob": 0.0,
5
+ "data_root_dir": "/media/liuzhuoyang/data/rlbench/rlds/12tasks_selected_keyframe_pointcloud_0814",
6
+ "future_action_window_size": 0,
7
+ "hf_token": ".hf_token",
8
+ "image_aug": false,
9
+ "is_resume": false,
10
+ "llm_vision_layers": 8,
11
+ "load_all_data_for_training": true,
12
+ "past_action_window_size": 0,
13
+ "pretrained_checkpoint": "/media/liuzhuoyang/new_vla/Rec_Diff_beta/exp/exp_12tasks_selected_keyframe_pointcloud_0814_Pretrainrtx0818e1_FreezeVistrue_Window0_Difftrue_Recfalse_Contrastive_Vislayer8_1024_0403_0820/checkpoints/step-005415-epoch-300-loss=1.4414.pt",
14
+ "recon_image": true,
15
+ "recon_pointcloud": true,
16
+ "repeated_diffusion_steps": 4,
17
+ "resume_epoch": null,
18
+ "resume_step": null,
19
+ "run_id": "exp_12tasks_selected_keyframe_pointcloud_0814_Pretraindiff300_FreezeVistrue_Window0_Difftrue_PCtrue_Rectrue_Contrastive_Vislayer8_1024_0403_0822",
20
+ "run_id_note": null,
21
+ "run_root_dir": "/media/liuzhuoyang/new_vla/Rec_Diff_beta/exp",
22
+ "save_interval": 100,
23
+ "seed": 42,
24
+ "trackers": [
25
+ "jsonl",
26
+ "wandb"
27
+ ],
28
+ "use_contrastive": true,
29
+ "use_diff": true,
30
+ "use_ema": false,
31
+ "use_pointcloud": true,
32
+ "use_reconstruction": true,
33
+ "use_roi": false,
34
+ "vla": {
35
+ "base_vlm": "prism-dinosiglip-224px+7b",
36
+ "data_mix": "rlbench",
37
+ "enable_gradient_checkpointing": true,
38
+ "enable_mixed_precision_training": true,
39
+ "epochs": 300,
40
+ "expected_world_size": 32,
41
+ "freeze_llm_backbone": false,
42
+ "freeze_vision_tower": true,
43
+ "global_batch_size": 256,
44
+ "learning_rate": 2e-05,
45
+ "lr_scheduler_type": "constant",
46
+ "max_grad_norm": 1.0,
47
+ "max_steps": null,
48
+ "per_device_batch_size": 8,
49
+ "reduce_in_full_precision": true,
50
+ "shuffle_buffer_size": 10000,
51
+ "train_strategy": "fsdp-full-shard",
52
+ "type": "prism-dinosiglip-224px+oxe+diffusion",
53
+ "unfreeze_last_llm_layer": false,
54
+ "vla_id": "prism-dinosiglip-224px+oxe+diffusion",
55
+ "warmup_ratio": 0.0,
56
+ "weight_decay": 0.0
57
+ },
58
+ "wandb_entity": "liumail2023-peking-university",
59
+ "wandb_project": "one_model_vla_sft"
60
+ }
config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ action_dim: 7
2
+ action_tokenizer_exist: false
3
+ class_dropout_prob: 0.0
4
+ data_root_dir: /media/liuzhuoyang/data/rlbench/rlds/12tasks_selected_keyframe_pointcloud_0814
5
+ future_action_window_size: 0
6
+ hf_token: .hf_token
7
+ image_aug: false
8
+ is_resume: false
9
+ llm_vision_layers: 8
10
+ load_all_data_for_training: true
11
+ past_action_window_size: 0
12
+ pretrained_checkpoint: /media/liuzhuoyang/new_vla/Rec_Diff_beta/exp/exp_12tasks_selected_keyframe_pointcloud_0814_Pretrainrtx0818e1_FreezeVistrue_Window0_Difftrue_Recfalse_Contrastive_Vislayer8_1024_0403_0820/checkpoints/step-005415-epoch-300-loss=1.4414.pt
13
+ recon_image: true
14
+ recon_pointcloud: true
15
+ repeated_diffusion_steps: 4
16
+ resume_epoch: null
17
+ resume_step: null
18
+ run_id: exp_12tasks_selected_keyframe_pointcloud_0814_Pretraindiff300_FreezeVistrue_Window0_Difftrue_PCtrue_Rectrue_Contrastive_Vislayer8_1024_0403_0822
19
+ run_id_note: null
20
+ run_root_dir: /media/liuzhuoyang/new_vla/Rec_Diff_beta/exp
21
+ save_interval: 100
22
+ seed: 42
23
+ trackers:
24
+ - jsonl
25
+ - wandb
26
+ use_contrastive: true
27
+ use_diff: true
28
+ use_ema: false
29
+ use_pointcloud: true
30
+ use_reconstruction: true
31
+ use_roi: false
32
+ vla:
33
+ base_vlm: prism-dinosiglip-224px+7b
34
+ data_mix: rlbench
35
+ enable_gradient_checkpointing: true
36
+ enable_mixed_precision_training: true
37
+ epochs: 300
38
+ expected_world_size: 32
39
+ freeze_llm_backbone: false
40
+ freeze_vision_tower: true
41
+ global_batch_size: 256
42
+ learning_rate: 2.0e-05
43
+ lr_scheduler_type: constant
44
+ max_grad_norm: 1.0
45
+ max_steps: null
46
+ per_device_batch_size: 8
47
+ reduce_in_full_precision: true
48
+ shuffle_buffer_size: 10000
49
+ train_strategy: fsdp-full-shard
50
+ type: prism-dinosiglip-224px+oxe+diffusion
51
+ unfreeze_last_llm_layer: false
52
+ vla_id: prism-dinosiglip-224px+oxe+diffusion
53
+ warmup_ratio: 0.0
54
+ weight_decay: 0.0
55
+ wandb_entity: liumail2023-peking-university
56
+ wandb_project: one_model_vla_sft
dataset_statistics.json ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "rlbench": {
3
+ "action": {
4
+ "mean": [
5
+ -0.010306132026016712,
6
+ -0.0055046346969902515,
7
+ -0.11197133362293243,
8
+ -0.0019046454690396786,
9
+ -0.15908075869083405,
10
+ 0.1675104796886444,
11
+ 0.39728033542633057
12
+ ],
13
+ "std": [
14
+ 0.08434142917394638,
15
+ 0.14875024557113647,
16
+ 0.1996437907218933,
17
+ 2.636392831802368,
18
+ 0.6083221435546875,
19
+ 1.8673105239868164,
20
+ 0.48934081196784973
21
+ ],
22
+ "max": [
23
+ 0.24629077315330505,
24
+ 0.4493105411529541,
25
+ 0.2484073042869568,
26
+ 3.141592264175415,
27
+ 1.570451259613037,
28
+ 3.141460657119751,
29
+ 1.0
30
+ ],
31
+ "min": [
32
+ -0.4184693694114685,
33
+ -0.4492553770542145,
34
+ -0.6604752540588379,
35
+ -3.1415884494781494,
36
+ -1.5704164505004883,
37
+ -3.1412742137908936,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ -0.2869752472639084,
42
+ -0.4469184121489525,
43
+ -0.6007548034191131,
44
+ -3.141468975543976,
45
+ -1.5699386596679688,
46
+ -3.1044607901573182,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 0.14981620118021965,
51
+ 0.44710047245025636,
52
+ 0.24729809522628785,
53
+ 3.141501989364624,
54
+ 1.5696296310424804,
55
+ 3.110466537475586,
56
+ 1.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "proprio": {
69
+ "mean": [
70
+ 0.22678296267986298,
71
+ 0.011608771048486233,
72
+ 1.124215841293335,
73
+ -0.7795729637145996,
74
+ -0.1190786063671112,
75
+ -0.1414777636528015,
76
+ 0.5020920634269714
77
+ ],
78
+ "std": [
79
+ 0.09569656103849411,
80
+ 0.17490601539611816,
81
+ 0.2254764586687088,
82
+ 2.6655027866363525,
83
+ 0.5839682221412659,
84
+ 2.2652320861816406,
85
+ 0.49999505281448364
86
+ ],
87
+ "max": [
88
+ 0.5282712578773499,
89
+ 0.42229560017585754,
90
+ 1.4721031188964844,
91
+ 3.141592502593994,
92
+ 1.568900227546692,
93
+ 3.1415927410125732,
94
+ 1.0
95
+ ],
96
+ "min": [
97
+ -0.14719118177890778,
98
+ -0.43890005350112915,
99
+ 0.7954218983650208,
100
+ -3.1415927410125732,
101
+ -1.5704164505004883,
102
+ -3.1415927410125732,
103
+ 0.0
104
+ ],
105
+ "q01": [
106
+ -0.05181949622929096,
107
+ -0.34075534880161285,
108
+ 0.7963145178556442,
109
+ -3.1415915489196777,
110
+ -1.5699386596679688,
111
+ -3.141591787338257,
112
+ 0.0
113
+ ],
114
+ "q99": [
115
+ 0.44086600691080097,
116
+ 0.3493700134754181,
117
+ 1.4720294535160066,
118
+ 3.1415910720825195,
119
+ 1.5673939752578736,
120
+ 3.141592025756836,
121
+ 1.0
122
+ ],
123
+ "mask": [
124
+ true,
125
+ true,
126
+ true,
127
+ true,
128
+ true,
129
+ true,
130
+ false
131
+ ]
132
+ },
133
+ "num_transitions": 4780,
134
+ "num_trajectories": 1200
135
+ }
136
+ }