dataset collected uniform random spwan block, add trained model fintuned 'lerobot/smolvla_base'
Browse files- config.json +88 -0
- model.safetensors +3 -0
- train_config.json +200 -0
    	
        config.json
    ADDED
    
    | @@ -0,0 +1,88 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "type": "smolvla",
         | 
| 3 | 
            +
                "n_obs_steps": 1,
         | 
| 4 | 
            +
                "normalization_mapping": {
         | 
| 5 | 
            +
                    "VISUAL": "IDENTITY",
         | 
| 6 | 
            +
                    "STATE": "MEAN_STD",
         | 
| 7 | 
            +
                    "ACTION": "MEAN_STD"
         | 
| 8 | 
            +
                },
         | 
| 9 | 
            +
                "input_features": {
         | 
| 10 | 
            +
                    "observation.image": {
         | 
| 11 | 
            +
                        "type": "VISUAL",
         | 
| 12 | 
            +
                        "shape": [
         | 
| 13 | 
            +
                            3,
         | 
| 14 | 
            +
                            256,
         | 
| 15 | 
            +
                            256
         | 
| 16 | 
            +
                        ]
         | 
| 17 | 
            +
                    },
         | 
| 18 | 
            +
                    "observation.wrist_image": {
         | 
| 19 | 
            +
                        "type": "VISUAL",
         | 
| 20 | 
            +
                        "shape": [
         | 
| 21 | 
            +
                            3,
         | 
| 22 | 
            +
                            256,
         | 
| 23 | 
            +
                            256
         | 
| 24 | 
            +
                        ]
         | 
| 25 | 
            +
                    },
         | 
| 26 | 
            +
                    "observation.state": {
         | 
| 27 | 
            +
                        "type": "STATE",
         | 
| 28 | 
            +
                        "shape": [
         | 
| 29 | 
            +
                            6
         | 
| 30 | 
            +
                        ]
         | 
| 31 | 
            +
                    }
         | 
| 32 | 
            +
                },
         | 
| 33 | 
            +
                "output_features": {
         | 
| 34 | 
            +
                    "action": {
         | 
| 35 | 
            +
                        "type": "ACTION",
         | 
| 36 | 
            +
                        "shape": [
         | 
| 37 | 
            +
                            7
         | 
| 38 | 
            +
                        ]
         | 
| 39 | 
            +
                    }
         | 
| 40 | 
            +
                },
         | 
| 41 | 
            +
                "device": "cuda",
         | 
| 42 | 
            +
                "use_amp": false,
         | 
| 43 | 
            +
                "push_to_hub": false,
         | 
| 44 | 
            +
                "repo_id": null,
         | 
| 45 | 
            +
                "private": null,
         | 
| 46 | 
            +
                "tags": null,
         | 
| 47 | 
            +
                "license": null,
         | 
| 48 | 
            +
                "chunk_size": 5,
         | 
| 49 | 
            +
                "n_action_steps": 5,
         | 
| 50 | 
            +
                "max_state_dim": 32,
         | 
| 51 | 
            +
                "max_action_dim": 32,
         | 
| 52 | 
            +
                "resize_imgs_with_padding": [
         | 
| 53 | 
            +
                    512,
         | 
| 54 | 
            +
                    512
         | 
| 55 | 
            +
                ],
         | 
| 56 | 
            +
                "empty_cameras": 0,
         | 
| 57 | 
            +
                "adapt_to_pi_aloha": false,
         | 
| 58 | 
            +
                "use_delta_joint_actions_aloha": false,
         | 
| 59 | 
            +
                "tokenizer_max_length": 48,
         | 
| 60 | 
            +
                "num_steps": 10,
         | 
| 61 | 
            +
                "use_cache": true,
         | 
| 62 | 
            +
                "freeze_vision_encoder": true,
         | 
| 63 | 
            +
                "train_expert_only": true,
         | 
| 64 | 
            +
                "train_state_proj": true,
         | 
| 65 | 
            +
                "optimizer_lr": 0.0001,
         | 
| 66 | 
            +
                "optimizer_betas": [
         | 
| 67 | 
            +
                    0.9,
         | 
| 68 | 
            +
                    0.95
         | 
| 69 | 
            +
                ],
         | 
| 70 | 
            +
                "optimizer_eps": 1e-08,
         | 
| 71 | 
            +
                "optimizer_weight_decay": 1e-10,
         | 
| 72 | 
            +
                "optimizer_grad_clip_norm": 10,
         | 
| 73 | 
            +
                "scheduler_warmup_steps": 1000,
         | 
| 74 | 
            +
                "scheduler_decay_steps": 30000,
         | 
| 75 | 
            +
                "scheduler_decay_lr": 2.5e-06,
         | 
| 76 | 
            +
                "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct",
         | 
| 77 | 
            +
                "load_vlm_weights": false,
         | 
| 78 | 
            +
                "add_image_special_tokens": false,
         | 
| 79 | 
            +
                "attention_mode": "cross_attn",
         | 
| 80 | 
            +
                "prefix_length": -1,
         | 
| 81 | 
            +
                "pad_language_to": "longest",
         | 
| 82 | 
            +
                "num_expert_layers": -1,
         | 
| 83 | 
            +
                "num_vlm_layers": 16,
         | 
| 84 | 
            +
                "self_attn_every_n_layers": 2,
         | 
| 85 | 
            +
                "expert_width_multiplier": 0.75,
         | 
| 86 | 
            +
                "min_period": 0.004,
         | 
| 87 | 
            +
                "max_period": 4.0
         | 
| 88 | 
            +
            }
         | 
    	
        model.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:016978679a18037585608825691ba9e5280ebc181f95c023548a3855ee6ed114
         | 
| 3 | 
            +
            size 1197790032
         | 
    	
        train_config.json
    ADDED
    
    | @@ -0,0 +1,200 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "dataset": {
         | 
| 3 | 
            +
                    "repo_id": "DragonHu/remove_red_block_from_plate_UR5_smolvla_mujoco",
         | 
| 4 | 
            +
                    "root": "./demo_data_language",
         | 
| 5 | 
            +
                    "episodes": null,
         | 
| 6 | 
            +
                    "image_transforms": {
         | 
| 7 | 
            +
                        "enable": false,
         | 
| 8 | 
            +
                        "max_num_transforms": 3,
         | 
| 9 | 
            +
                        "random_order": false,
         | 
| 10 | 
            +
                        "tfs": {
         | 
| 11 | 
            +
                            "brightness": {
         | 
| 12 | 
            +
                                "weight": 1.0,
         | 
| 13 | 
            +
                                "type": "ColorJitter",
         | 
| 14 | 
            +
                                "kwargs": {
         | 
| 15 | 
            +
                                    "brightness": [
         | 
| 16 | 
            +
                                        0.8,
         | 
| 17 | 
            +
                                        1.2
         | 
| 18 | 
            +
                                    ]
         | 
| 19 | 
            +
                                }
         | 
| 20 | 
            +
                            },
         | 
| 21 | 
            +
                            "contrast": {
         | 
| 22 | 
            +
                                "weight": 1.0,
         | 
| 23 | 
            +
                                "type": "ColorJitter",
         | 
| 24 | 
            +
                                "kwargs": {
         | 
| 25 | 
            +
                                    "contrast": [
         | 
| 26 | 
            +
                                        0.8,
         | 
| 27 | 
            +
                                        1.2
         | 
| 28 | 
            +
                                    ]
         | 
| 29 | 
            +
                                }
         | 
| 30 | 
            +
                            },
         | 
| 31 | 
            +
                            "saturation": {
         | 
| 32 | 
            +
                                "weight": 1.0,
         | 
| 33 | 
            +
                                "type": "ColorJitter",
         | 
| 34 | 
            +
                                "kwargs": {
         | 
| 35 | 
            +
                                    "saturation": [
         | 
| 36 | 
            +
                                        0.5,
         | 
| 37 | 
            +
                                        1.5
         | 
| 38 | 
            +
                                    ]
         | 
| 39 | 
            +
                                }
         | 
| 40 | 
            +
                            },
         | 
| 41 | 
            +
                            "hue": {
         | 
| 42 | 
            +
                                "weight": 1.0,
         | 
| 43 | 
            +
                                "type": "ColorJitter",
         | 
| 44 | 
            +
                                "kwargs": {
         | 
| 45 | 
            +
                                    "hue": [
         | 
| 46 | 
            +
                                        -0.05,
         | 
| 47 | 
            +
                                        0.05
         | 
| 48 | 
            +
                                    ]
         | 
| 49 | 
            +
                                }
         | 
| 50 | 
            +
                            },
         | 
| 51 | 
            +
                            "sharpness": {
         | 
| 52 | 
            +
                                "weight": 1.0,
         | 
| 53 | 
            +
                                "type": "SharpnessJitter",
         | 
| 54 | 
            +
                                "kwargs": {
         | 
| 55 | 
            +
                                    "sharpness": [
         | 
| 56 | 
            +
                                        0.5,
         | 
| 57 | 
            +
                                        1.5
         | 
| 58 | 
            +
                                    ]
         | 
| 59 | 
            +
                                }
         | 
| 60 | 
            +
                            }
         | 
| 61 | 
            +
                        }
         | 
| 62 | 
            +
                    },
         | 
| 63 | 
            +
                    "revision": null,
         | 
| 64 | 
            +
                    "use_imagenet_stats": true,
         | 
| 65 | 
            +
                    "video_backend": "torchcodec"
         | 
| 66 | 
            +
                },
         | 
| 67 | 
            +
                "env": null,
         | 
| 68 | 
            +
                "policy": {
         | 
| 69 | 
            +
                    "type": "smolvla",
         | 
| 70 | 
            +
                    "n_obs_steps": 1,
         | 
| 71 | 
            +
                    "normalization_mapping": {
         | 
| 72 | 
            +
                        "VISUAL": "IDENTITY",
         | 
| 73 | 
            +
                        "STATE": "MEAN_STD",
         | 
| 74 | 
            +
                        "ACTION": "MEAN_STD"
         | 
| 75 | 
            +
                    },
         | 
| 76 | 
            +
                    "input_features": {
         | 
| 77 | 
            +
                        "observation.image": {
         | 
| 78 | 
            +
                            "type": "VISUAL",
         | 
| 79 | 
            +
                            "shape": [
         | 
| 80 | 
            +
                                3,
         | 
| 81 | 
            +
                                256,
         | 
| 82 | 
            +
                                256
         | 
| 83 | 
            +
                            ]
         | 
| 84 | 
            +
                        },
         | 
| 85 | 
            +
                        "observation.wrist_image": {
         | 
| 86 | 
            +
                            "type": "VISUAL",
         | 
| 87 | 
            +
                            "shape": [
         | 
| 88 | 
            +
                                3,
         | 
| 89 | 
            +
                                256,
         | 
| 90 | 
            +
                                256
         | 
| 91 | 
            +
                            ]
         | 
| 92 | 
            +
                        },
         | 
| 93 | 
            +
                        "observation.state": {
         | 
| 94 | 
            +
                            "type": "STATE",
         | 
| 95 | 
            +
                            "shape": [
         | 
| 96 | 
            +
                                6
         | 
| 97 | 
            +
                            ]
         | 
| 98 | 
            +
                        }
         | 
| 99 | 
            +
                    },
         | 
| 100 | 
            +
                    "output_features": {
         | 
| 101 | 
            +
                        "action": {
         | 
| 102 | 
            +
                            "type": "ACTION",
         | 
| 103 | 
            +
                            "shape": [
         | 
| 104 | 
            +
                                7
         | 
| 105 | 
            +
                            ]
         | 
| 106 | 
            +
                        }
         | 
| 107 | 
            +
                    },
         | 
| 108 | 
            +
                    "device": "cuda",
         | 
| 109 | 
            +
                    "use_amp": false,
         | 
| 110 | 
            +
                    "push_to_hub": false,
         | 
| 111 | 
            +
                    "repo_id": null,
         | 
| 112 | 
            +
                    "private": null,
         | 
| 113 | 
            +
                    "tags": null,
         | 
| 114 | 
            +
                    "license": null,
         | 
| 115 | 
            +
                    "chunk_size": 5,
         | 
| 116 | 
            +
                    "n_action_steps": 5,
         | 
| 117 | 
            +
                    "max_state_dim": 32,
         | 
| 118 | 
            +
                    "max_action_dim": 32,
         | 
| 119 | 
            +
                    "resize_imgs_with_padding": [
         | 
| 120 | 
            +
                        512,
         | 
| 121 | 
            +
                        512
         | 
| 122 | 
            +
                    ],
         | 
| 123 | 
            +
                    "empty_cameras": 0,
         | 
| 124 | 
            +
                    "adapt_to_pi_aloha": false,
         | 
| 125 | 
            +
                    "use_delta_joint_actions_aloha": false,
         | 
| 126 | 
            +
                    "tokenizer_max_length": 48,
         | 
| 127 | 
            +
                    "num_steps": 10,
         | 
| 128 | 
            +
                    "use_cache": true,
         | 
| 129 | 
            +
                    "freeze_vision_encoder": true,
         | 
| 130 | 
            +
                    "train_expert_only": true,
         | 
| 131 | 
            +
                    "train_state_proj": true,
         | 
| 132 | 
            +
                    "optimizer_lr": 0.0001,
         | 
| 133 | 
            +
                    "optimizer_betas": [
         | 
| 134 | 
            +
                        0.9,
         | 
| 135 | 
            +
                        0.95
         | 
| 136 | 
            +
                    ],
         | 
| 137 | 
            +
                    "optimizer_eps": 1e-08,
         | 
| 138 | 
            +
                    "optimizer_weight_decay": 1e-10,
         | 
| 139 | 
            +
                    "optimizer_grad_clip_norm": 10,
         | 
| 140 | 
            +
                    "scheduler_warmup_steps": 1000,
         | 
| 141 | 
            +
                    "scheduler_decay_steps": 30000,
         | 
| 142 | 
            +
                    "scheduler_decay_lr": 2.5e-06,
         | 
| 143 | 
            +
                    "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct",
         | 
| 144 | 
            +
                    "load_vlm_weights": false,
         | 
| 145 | 
            +
                    "add_image_special_tokens": false,
         | 
| 146 | 
            +
                    "attention_mode": "cross_attn",
         | 
| 147 | 
            +
                    "prefix_length": -1,
         | 
| 148 | 
            +
                    "pad_language_to": "longest",
         | 
| 149 | 
            +
                    "num_expert_layers": -1,
         | 
| 150 | 
            +
                    "num_vlm_layers": 16,
         | 
| 151 | 
            +
                    "self_attn_every_n_layers": 2,
         | 
| 152 | 
            +
                    "expert_width_multiplier": 0.75,
         | 
| 153 | 
            +
                    "min_period": 0.004,
         | 
| 154 | 
            +
                    "max_period": 4.0
         | 
| 155 | 
            +
                },
         | 
| 156 | 
            +
                "output_dir": "ckpt/smolvla_omy",
         | 
| 157 | 
            +
                "job_name": "smolvla_remove_block",
         | 
| 158 | 
            +
                "resume": false,
         | 
| 159 | 
            +
                "seed": 42,
         | 
| 160 | 
            +
                "num_workers": 8,
         | 
| 161 | 
            +
                "batch_size": 16,
         | 
| 162 | 
            +
                "steps": 20000,
         | 
| 163 | 
            +
                "eval_freq": -1,
         | 
| 164 | 
            +
                "log_freq": 50,
         | 
| 165 | 
            +
                "save_checkpoint": true,
         | 
| 166 | 
            +
                "save_freq": 10000,
         | 
| 167 | 
            +
                "use_policy_training_preset": true,
         | 
| 168 | 
            +
                "optimizer": {
         | 
| 169 | 
            +
                    "type": "adamw",
         | 
| 170 | 
            +
                    "lr": 0.0001,
         | 
| 171 | 
            +
                    "weight_decay": 1e-10,
         | 
| 172 | 
            +
                    "grad_clip_norm": 10,
         | 
| 173 | 
            +
                    "betas": [
         | 
| 174 | 
            +
                        0.9,
         | 
| 175 | 
            +
                        0.95
         | 
| 176 | 
            +
                    ],
         | 
| 177 | 
            +
                    "eps": 1e-08
         | 
| 178 | 
            +
                },
         | 
| 179 | 
            +
                "scheduler": {
         | 
| 180 | 
            +
                    "type": "cosine_decay_with_warmup",
         | 
| 181 | 
            +
                    "num_warmup_steps": 1000,
         | 
| 182 | 
            +
                    "num_decay_steps": 30000,
         | 
| 183 | 
            +
                    "peak_lr": 0.0001,
         | 
| 184 | 
            +
                    "decay_lr": 2.5e-06
         | 
| 185 | 
            +
                },
         | 
| 186 | 
            +
                "eval": {
         | 
| 187 | 
            +
                    "n_episodes": 50,
         | 
| 188 | 
            +
                    "batch_size": 50,
         | 
| 189 | 
            +
                    "use_async_envs": false
         | 
| 190 | 
            +
                },
         | 
| 191 | 
            +
                "wandb": {
         | 
| 192 | 
            +
                    "enable": true,
         | 
| 193 | 
            +
                    "disable_artifact": true,
         | 
| 194 | 
            +
                    "project": "smolvla_pnp_remove_block_random_uniform",
         | 
| 195 | 
            +
                    "entity": "wenxuan-hu97-centralesup-lec",
         | 
| 196 | 
            +
                    "notes": null,
         | 
| 197 | 
            +
                    "run_id": "iabuxg82",
         | 
| 198 | 
            +
                    "mode": null
         | 
| 199 | 
            +
                }
         | 
| 200 | 
            +
            }
         | 
