Spaces:
Runtime error
Runtime error
# LHM-1B | |
experiment: | |
type: lrm | |
seed: 42 | |
parent: video_human_benchmark | |
child: human-lrm-1B | |
model: | |
# image encoder | |
model_name: SapDinoLRMBHSD3_5 | |
encoder_type: dinov2_fusion | |
encoder_model_name: "dinov2_vitl14_reg" | |
encoder_feat_dim: 1024 # dinov2 embeding size 1024 | |
encoder_freeze: False | |
fine_encoder_type: sapiens | |
fine_encoder_model_name: "./pretrained_models/sapiens/pretrained/checkpoints/sapiens_1b/sapiens_1b_epoch_173_torchscript.pt2" # sapiens pretrained model path | |
fine_encoder_feat_dim: 1536 # sapiens embeding size 1024 | |
fine_encoder_freeze: True | |
use_face_id: True | |
# points embeddings | |
# num_pcl: 10240 | |
latent_query_points_type: "e2e_smplx_sub1" | |
pcl_dim: 1024 | |
facesr: True | |
# transformer | |
# # camera_embed_dim: 1024 | |
# transformer_dim: 512 | |
# transformer_layers: 12 | |
# transformer_heads: 8 | |
transformer_type: "sd3_mm_bh_cond" # multi-modal attention. | |
transformer_heads: 16 # 30 | |
transformer_dim: 1024 # 30 * 64=1920 | |
transformer_layers: 15 # 30 | |
tf_grad_ckpt: true | |
encoder_grad_ckpt: true | |
# for gs renderer | |
human_model_path: "./pretrained_models/human_model_files" | |
smplx_subdivide_num: 1 | |
smplx_type: "smplx_2" | |
gs_query_dim: 1024 | |
gs_use_rgb: True | |
gs_sh: 3 | |
dense_sample_pts: 40000 # 4,000 | |
gs_mlp_network_config: | |
n_neurons: 512 | |
n_hidden_layers: 2 | |
activation: silu | |
# gs_xyz_offset_max_step: 0.05625 # 1.8 / 32 | |
# gs_clip_scaling: 0.2 # avoid too large Sphere | |
gs_xyz_offset_max_step: 1. # 1.8 / 32 | |
gs_clip_scaling: [100, 0.01, 0.05, 3000] # [start, start_v, end_v, end] | |
expr_param_dim: 100 | |
shape_param_dim: 10 | |
fix_opacity: False | |
fix_rotation: False | |
cano_pose_type: 1 # 0 means exavatar-pose 1 indicates REC-MV pose | |
dataset: | |
subsets: | |
- name: video_human_flame | |
root_dirs: "./train_data/ClothVideo" | |
meta_path: | |
train: "./train_data/ClothVideo/label/valid_id_with_img_list_clean_30W.json" | |
val: "./train_data/ClothVideo/label/valid_id_with_img_list_val.json" | |
sample_rate: 1.0 | |
use_flame: True | |
src_head_size: 112 | |
- name: video_human_flame_v2 | |
root_dirs: "./train_data/ClothVideo" | |
meta_path: | |
train: "./train_data/ClothVideo/label/valid_synthetic_data_train.json" | |
val: "./train_data/ClothVideo/label/valid_synthetic_data_val.json" | |
sample_rate: 1.0 | |
use_flame: True | |
src_head_size: 112 | |
sample_side_views: 5 | |
source_image_res: 1024 | |
src_head_size: 112 | |
render_image: | |
low: 512 | |
high: 512 | |
region: null | |
num_train_workers: 4 | |
multiply: 16 # dino features | |
num_val_workers: 2 | |
pin_mem: true | |
repeat_num: 1 | |
train: | |
mixed_precision: bf16 # REPLACE THIS BASED ON GPU TYPE | |
find_unused_parameters: false | |
loss_func: | |
pixel_loss: l1 # L1 or MSE | |
ball_loss: | |
type: heuristic # heuristic ball_loss | |
group: | |
head: 1. | |
lower_body: 100. | |
upper_body: 1000. | |
hands: 10000. | |
offset_loss: | |
type: classical # heuristic ball_loss | |
group: | |
head: 1. | |
lower_body: 1. | |
upper_body: 100. | |
hands: 1000. | |
loss: | |
pixel_weight: 0.0 | |
masked_pixel_weight: 1.0 | |
masked_head_weight: 0.0 | |
perceptual_weight: 1.0 | |
# tv_weight: 5e-4 | |
tv_weight: -1 | |
mask_weight: 1.0 | |
face_id_weight: 0.05 | |
asap_weight: 10.0 # ball loss | |
acap_weight: 1000.0 # offset loss | |
optim: | |
lr: 4e-5 | |
weight_decay: 0.05 | |
beta1: 0.9 | |
beta2: 0.95 | |
clip_grad_norm: 0.1 # diffusion model | |
scheduler: | |
type: cosine | |
warmup_real_iters: 0 | |
batch_size: 2 # REPLACE THIS (PER GPU) | |
accum_steps: 1 # REPLACE THIS | |
epochs: 60 # REPLACE THIS | |
debug_global_steps: null | |
val: | |
batch_size: 2 | |
global_step_period: 1000 | |
debug_batches: 10 | |
saver: | |
auto_resume: True | |
checkpoint_root: None | |
checkpoint_global_steps: 1000 | |
checkpoint_keep_level: 60 | |
logger: | |
stream_level: WARNING | |
log_level: INFO | |
log_root: ./exps/logs | |
tracker_root: ./exps/trackers | |
enable_profiler: false | |
trackers: | |
- tensorboard | |
image_monitor: | |
train_global_steps: 100 | |
samples_per_log: 4 | |
compile: | |
suppress_errors: true | |
print_specializations: true | |
disable: true |