Image-to-3D
Hunyuan3D-2
Diffusers
Safetensors
English
Chinese
text-to-3d
File size: 2,078 Bytes
07d6dc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
model:
  target: hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain
  params:
    input_size: &num_latents 4096
    in_channels: 64
    hidden_size: 2048
    context_dim: 1024
    depth: 21
    num_heads: 16
    qk_norm: true
    text_len: 1370
    with_decoupled_ca: false
    use_attention_pooling: false
    qk_norm_type: 'rms'
    qkv_bias: false
    use_pos_emb: false
    num_moe_layers: 6
    num_experts: 8
    moe_top_k: 2

vae:
  target: hy3dshape.models.autoencoders.ShapeVAE
  params:
    num_latents: *num_latents
    embed_dim: 64
    num_freqs: 8
    include_pi: false
    heads: 16
    width: 1024
    num_encoder_layers: 8
    num_decoder_layers: 16
    qkv_bias: false
    qk_norm: true
    scale_factor: 1.0039506158752403
    geo_decoder_mlp_expand_ratio: 4
    geo_decoder_downsample_ratio: 1
    geo_decoder_ln_post: true
    point_feats: 4
    pc_size: 81920
    pc_sharpedge_size: 0

conditioner:
  target: hy3dshape.models.conditioner.SingleImageEncoder
  params:
    main_image_encoder:
        type: DinoImageEncoder # dino large
        kwargs:
            config:
              attention_probs_dropout_prob: 0.0
              drop_path_rate: 0.0
              hidden_act: gelu
              hidden_dropout_prob: 0.0
              hidden_size: 1024
              image_size: 518
              initializer_range: 0.02
              layer_norm_eps: 1.e-6
              layerscale_value: 1.0
              mlp_ratio: 4
              model_type: dinov2
              num_attention_heads: 16
              num_channels: 3
              num_hidden_layers: 24
              patch_size: 14
              qkv_bias: true
              torch_dtype: float32
              use_swiglu_ffn: false
            image_size: 518
            use_cls_token: true

scheduler:
  target: hy3dshape.schedulers.FlowMatchEulerDiscreteScheduler
  params:
    num_train_timesteps: 1000

image_processor:
  target: hy3dshape.preprocessors.ImageProcessorV2
  params:
    size: 512
    border_ratio: 0.15

pipeline:
  target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline