lzy
commited on
Commit
·
189e351
1
Parent(s):
9de12d9
Add model weights
Browse files- checkpoints/post_ckpt.pt +3 -0
- config.json +60 -0
- config.yaml +56 -0
- dataset_statistics.json +136 -0
checkpoints/post_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33d897704f1d358b0c28dd244684587f48fc5cae72eb626c6ba471c69cf88958
|
| 3 |
+
size 33260216370
|
config.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"action_dim": 7,
|
| 3 |
+
"action_tokenizer_exist": false,
|
| 4 |
+
"class_dropout_prob": 0.0,
|
| 5 |
+
"data_root_dir": "/media/liuzhuoyang/data/rlbench/rlds/12tasks_selected_keyframe_pointcloud_0814",
|
| 6 |
+
"future_action_window_size": 0,
|
| 7 |
+
"hf_token": ".hf_token",
|
| 8 |
+
"image_aug": false,
|
| 9 |
+
"is_resume": false,
|
| 10 |
+
"llm_vision_layers": 8,
|
| 11 |
+
"load_all_data_for_training": true,
|
| 12 |
+
"past_action_window_size": 0,
|
| 13 |
+
"pretrained_checkpoint": "/media/liuzhuoyang/new_vla/Rec_Diff_beta/exp/exp_12tasks_selected_keyframe_pointcloud_0814_Pretrainrtx0818e1_FreezeVistrue_Window0_Difftrue_Recfalse_Contrastive_Vislayer8_1024_0403_0820/checkpoints/step-005415-epoch-300-loss=1.4414.pt",
|
| 14 |
+
"recon_image": true,
|
| 15 |
+
"recon_pointcloud": true,
|
| 16 |
+
"repeated_diffusion_steps": 4,
|
| 17 |
+
"resume_epoch": null,
|
| 18 |
+
"resume_step": null,
|
| 19 |
+
"run_id": "exp_12tasks_selected_keyframe_pointcloud_0814_Pretraindiff300_FreezeVistrue_Window0_Difftrue_PCtrue_Rectrue_Contrastive_Vislayer8_1024_0403_0822",
|
| 20 |
+
"run_id_note": null,
|
| 21 |
+
"run_root_dir": "/media/liuzhuoyang/new_vla/Rec_Diff_beta/exp",
|
| 22 |
+
"save_interval": 100,
|
| 23 |
+
"seed": 42,
|
| 24 |
+
"trackers": [
|
| 25 |
+
"jsonl",
|
| 26 |
+
"wandb"
|
| 27 |
+
],
|
| 28 |
+
"use_contrastive": true,
|
| 29 |
+
"use_diff": true,
|
| 30 |
+
"use_ema": false,
|
| 31 |
+
"use_pointcloud": true,
|
| 32 |
+
"use_reconstruction": true,
|
| 33 |
+
"use_roi": false,
|
| 34 |
+
"vla": {
|
| 35 |
+
"base_vlm": "prism-dinosiglip-224px+7b",
|
| 36 |
+
"data_mix": "rlbench",
|
| 37 |
+
"enable_gradient_checkpointing": true,
|
| 38 |
+
"enable_mixed_precision_training": true,
|
| 39 |
+
"epochs": 300,
|
| 40 |
+
"expected_world_size": 32,
|
| 41 |
+
"freeze_llm_backbone": false,
|
| 42 |
+
"freeze_vision_tower": true,
|
| 43 |
+
"global_batch_size": 256,
|
| 44 |
+
"learning_rate": 2e-05,
|
| 45 |
+
"lr_scheduler_type": "constant",
|
| 46 |
+
"max_grad_norm": 1.0,
|
| 47 |
+
"max_steps": null,
|
| 48 |
+
"per_device_batch_size": 8,
|
| 49 |
+
"reduce_in_full_precision": true,
|
| 50 |
+
"shuffle_buffer_size": 10000,
|
| 51 |
+
"train_strategy": "fsdp-full-shard",
|
| 52 |
+
"type": "prism-dinosiglip-224px+oxe+diffusion",
|
| 53 |
+
"unfreeze_last_llm_layer": false,
|
| 54 |
+
"vla_id": "prism-dinosiglip-224px+oxe+diffusion",
|
| 55 |
+
"warmup_ratio": 0.0,
|
| 56 |
+
"weight_decay": 0.0
|
| 57 |
+
},
|
| 58 |
+
"wandb_entity": "liumail2023-peking-university",
|
| 59 |
+
"wandb_project": "one_model_vla_sft"
|
| 60 |
+
}
|
config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
action_dim: 7
|
| 2 |
+
action_tokenizer_exist: false
|
| 3 |
+
class_dropout_prob: 0.0
|
| 4 |
+
data_root_dir: /media/liuzhuoyang/data/rlbench/rlds/12tasks_selected_keyframe_pointcloud_0814
|
| 5 |
+
future_action_window_size: 0
|
| 6 |
+
hf_token: .hf_token
|
| 7 |
+
image_aug: false
|
| 8 |
+
is_resume: false
|
| 9 |
+
llm_vision_layers: 8
|
| 10 |
+
load_all_data_for_training: true
|
| 11 |
+
past_action_window_size: 0
|
| 12 |
+
pretrained_checkpoint: /media/liuzhuoyang/new_vla/Rec_Diff_beta/exp/exp_12tasks_selected_keyframe_pointcloud_0814_Pretrainrtx0818e1_FreezeVistrue_Window0_Difftrue_Recfalse_Contrastive_Vislayer8_1024_0403_0820/checkpoints/step-005415-epoch-300-loss=1.4414.pt
|
| 13 |
+
recon_image: true
|
| 14 |
+
recon_pointcloud: true
|
| 15 |
+
repeated_diffusion_steps: 4
|
| 16 |
+
resume_epoch: null
|
| 17 |
+
resume_step: null
|
| 18 |
+
run_id: exp_12tasks_selected_keyframe_pointcloud_0814_Pretraindiff300_FreezeVistrue_Window0_Difftrue_PCtrue_Rectrue_Contrastive_Vislayer8_1024_0403_0822
|
| 19 |
+
run_id_note: null
|
| 20 |
+
run_root_dir: /media/liuzhuoyang/new_vla/Rec_Diff_beta/exp
|
| 21 |
+
save_interval: 100
|
| 22 |
+
seed: 42
|
| 23 |
+
trackers:
|
| 24 |
+
- jsonl
|
| 25 |
+
- wandb
|
| 26 |
+
use_contrastive: true
|
| 27 |
+
use_diff: true
|
| 28 |
+
use_ema: false
|
| 29 |
+
use_pointcloud: true
|
| 30 |
+
use_reconstruction: true
|
| 31 |
+
use_roi: false
|
| 32 |
+
vla:
|
| 33 |
+
base_vlm: prism-dinosiglip-224px+7b
|
| 34 |
+
data_mix: rlbench
|
| 35 |
+
enable_gradient_checkpointing: true
|
| 36 |
+
enable_mixed_precision_training: true
|
| 37 |
+
epochs: 300
|
| 38 |
+
expected_world_size: 32
|
| 39 |
+
freeze_llm_backbone: false
|
| 40 |
+
freeze_vision_tower: true
|
| 41 |
+
global_batch_size: 256
|
| 42 |
+
learning_rate: 2.0e-05
|
| 43 |
+
lr_scheduler_type: constant
|
| 44 |
+
max_grad_norm: 1.0
|
| 45 |
+
max_steps: null
|
| 46 |
+
per_device_batch_size: 8
|
| 47 |
+
reduce_in_full_precision: true
|
| 48 |
+
shuffle_buffer_size: 10000
|
| 49 |
+
train_strategy: fsdp-full-shard
|
| 50 |
+
type: prism-dinosiglip-224px+oxe+diffusion
|
| 51 |
+
unfreeze_last_llm_layer: false
|
| 52 |
+
vla_id: prism-dinosiglip-224px+oxe+diffusion
|
| 53 |
+
warmup_ratio: 0.0
|
| 54 |
+
weight_decay: 0.0
|
| 55 |
+
wandb_entity: liumail2023-peking-university
|
| 56 |
+
wandb_project: one_model_vla_sft
|
dataset_statistics.json
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"rlbench": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
-0.010306132026016712,
|
| 6 |
+
-0.0055046346969902515,
|
| 7 |
+
-0.11197133362293243,
|
| 8 |
+
-0.0019046454690396786,
|
| 9 |
+
-0.15908075869083405,
|
| 10 |
+
0.1675104796886444,
|
| 11 |
+
0.39728033542633057
|
| 12 |
+
],
|
| 13 |
+
"std": [
|
| 14 |
+
0.08434142917394638,
|
| 15 |
+
0.14875024557113647,
|
| 16 |
+
0.1996437907218933,
|
| 17 |
+
2.636392831802368,
|
| 18 |
+
0.6083221435546875,
|
| 19 |
+
1.8673105239868164,
|
| 20 |
+
0.48934081196784973
|
| 21 |
+
],
|
| 22 |
+
"max": [
|
| 23 |
+
0.24629077315330505,
|
| 24 |
+
0.4493105411529541,
|
| 25 |
+
0.2484073042869568,
|
| 26 |
+
3.141592264175415,
|
| 27 |
+
1.570451259613037,
|
| 28 |
+
3.141460657119751,
|
| 29 |
+
1.0
|
| 30 |
+
],
|
| 31 |
+
"min": [
|
| 32 |
+
-0.4184693694114685,
|
| 33 |
+
-0.4492553770542145,
|
| 34 |
+
-0.6604752540588379,
|
| 35 |
+
-3.1415884494781494,
|
| 36 |
+
-1.5704164505004883,
|
| 37 |
+
-3.1412742137908936,
|
| 38 |
+
0.0
|
| 39 |
+
],
|
| 40 |
+
"q01": [
|
| 41 |
+
-0.2869752472639084,
|
| 42 |
+
-0.4469184121489525,
|
| 43 |
+
-0.6007548034191131,
|
| 44 |
+
-3.141468975543976,
|
| 45 |
+
-1.5699386596679688,
|
| 46 |
+
-3.1044607901573182,
|
| 47 |
+
0.0
|
| 48 |
+
],
|
| 49 |
+
"q99": [
|
| 50 |
+
0.14981620118021965,
|
| 51 |
+
0.44710047245025636,
|
| 52 |
+
0.24729809522628785,
|
| 53 |
+
3.141501989364624,
|
| 54 |
+
1.5696296310424804,
|
| 55 |
+
3.110466537475586,
|
| 56 |
+
1.0
|
| 57 |
+
],
|
| 58 |
+
"mask": [
|
| 59 |
+
true,
|
| 60 |
+
true,
|
| 61 |
+
true,
|
| 62 |
+
true,
|
| 63 |
+
true,
|
| 64 |
+
true,
|
| 65 |
+
false
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"proprio": {
|
| 69 |
+
"mean": [
|
| 70 |
+
0.22678296267986298,
|
| 71 |
+
0.011608771048486233,
|
| 72 |
+
1.124215841293335,
|
| 73 |
+
-0.7795729637145996,
|
| 74 |
+
-0.1190786063671112,
|
| 75 |
+
-0.1414777636528015,
|
| 76 |
+
0.5020920634269714
|
| 77 |
+
],
|
| 78 |
+
"std": [
|
| 79 |
+
0.09569656103849411,
|
| 80 |
+
0.17490601539611816,
|
| 81 |
+
0.2254764586687088,
|
| 82 |
+
2.6655027866363525,
|
| 83 |
+
0.5839682221412659,
|
| 84 |
+
2.2652320861816406,
|
| 85 |
+
0.49999505281448364
|
| 86 |
+
],
|
| 87 |
+
"max": [
|
| 88 |
+
0.5282712578773499,
|
| 89 |
+
0.42229560017585754,
|
| 90 |
+
1.4721031188964844,
|
| 91 |
+
3.141592502593994,
|
| 92 |
+
1.568900227546692,
|
| 93 |
+
3.1415927410125732,
|
| 94 |
+
1.0
|
| 95 |
+
],
|
| 96 |
+
"min": [
|
| 97 |
+
-0.14719118177890778,
|
| 98 |
+
-0.43890005350112915,
|
| 99 |
+
0.7954218983650208,
|
| 100 |
+
-3.1415927410125732,
|
| 101 |
+
-1.5704164505004883,
|
| 102 |
+
-3.1415927410125732,
|
| 103 |
+
0.0
|
| 104 |
+
],
|
| 105 |
+
"q01": [
|
| 106 |
+
-0.05181949622929096,
|
| 107 |
+
-0.34075534880161285,
|
| 108 |
+
0.7963145178556442,
|
| 109 |
+
-3.1415915489196777,
|
| 110 |
+
-1.5699386596679688,
|
| 111 |
+
-3.141591787338257,
|
| 112 |
+
0.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.44086600691080097,
|
| 116 |
+
0.3493700134754181,
|
| 117 |
+
1.4720294535160066,
|
| 118 |
+
3.1415910720825195,
|
| 119 |
+
1.5673939752578736,
|
| 120 |
+
3.141592025756836,
|
| 121 |
+
1.0
|
| 122 |
+
],
|
| 123 |
+
"mask": [
|
| 124 |
+
true,
|
| 125 |
+
true,
|
| 126 |
+
true,
|
| 127 |
+
true,
|
| 128 |
+
true,
|
| 129 |
+
true,
|
| 130 |
+
false
|
| 131 |
+
]
|
| 132 |
+
},
|
| 133 |
+
"num_transitions": 4780,
|
| 134 |
+
"num_trajectories": 1200
|
| 135 |
+
}
|
| 136 |
+
}
|