reachomk commited on
Commit
2c5d89e
·
verified ·
1 Parent(s): c8c2caa

Upload folder using huggingface_hub

Browse files
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. .gitignore +1 -0
  3. README.md +9 -0
  4. arguments.txt +31 -0
  5. config.json +28 -0
  6. mae-teaser.png +3 -0
  7. model.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ mae-teaser.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Generative Models Enable Generalizable Instance Segmentation
2
+ <img src='mae-teaser.png'/>
3
+ This is the official model release for the ImageNet-1k pretrained Masked Autoencoder (MAE-H) variant of our `gen2seg` generative instance segmenter. It is the same checkpoint we used to generate figures in the paper.
4
+
5
+ **The sementations in the image above are different from those in the paper teaser because they are generated with this MAE-H model instead of our SD model.** If you are looking for our Stable Diffusion 2 (SD) variant, you can find that at https://huggingface.co/reachomk/gen2seg-sd
6
+
7
+ Please see our website https://reachomk.github.io/gen2seg for our paper, demos, and additional qualitative samples.
8
+
9
+ You can run this model at our GitHub: https://github.com/reachomk/gen2seg or our Huggingface Space: https://huggingface.co/spaces/reachomk/gen2seg-mae-h
arguments.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ modality: instance
2
+ noise_type: None
3
+ lr_exp_warmup_steps: 100
4
+ lr_total_iter_length: 40000
5
+ pretrained_model_name_or_path: facebook/vit-mae-huge
6
+ revision: None
7
+ variant: None
8
+ output_dir: model-finetuned/mae_full_e2e_ft_mean_mixed_sqrtsep_meansep_norm
9
+ seed: 500
10
+ train_batch_size: 2
11
+ num_train_epochs: 15
12
+ max_train_steps: 30000
13
+ gradient_accumulation_steps: 4
14
+ gradient_checkpointing: True
15
+ learning_rate: 6e-05
16
+ dataloader_num_workers: 0
17
+ adam_beta1: 0.9
18
+ adam_beta2: 0.999
19
+ adam_weight_decay: 0.01
20
+ adam_epsilon: 1e-08
21
+ max_grad_norm: 1.0
22
+ logging_dir: logs
23
+ mixed_precision: no
24
+ report_to: tensorboard
25
+ local_rank: 0
26
+ checkpointing_steps: 10000
27
+ checkpoints_total_limit: None
28
+ resume_from_checkpoint: None
29
+ enable_xformers_memory_efficient_attention: True
30
+ tracker_project_name: e2e-ft-diffusion
31
+ random_state_file: /nfs_share3/om/diffusion-e2e-ft/model-finetuned/stable_diffusion_e2e_ft_instance_10k/checkpoint-10000/random_states_0.pkl
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/vit-mae-huge",
3
+ "architectures": [
4
+ "ViTMAEForPreTraining"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "decoder_hidden_size": 512,
8
+ "decoder_intermediate_size": 2048,
9
+ "decoder_num_attention_heads": 16,
10
+ "decoder_num_hidden_layers": 8,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 1280,
14
+ "image_size": 224,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 5120,
17
+ "layer_norm_eps": 1e-12,
18
+ "mask_ratio": 0.0,
19
+ "model_type": "vit_mae",
20
+ "norm_pix_loss": false,
21
+ "num_attention_heads": 16,
22
+ "num_channels": 3,
23
+ "num_hidden_layers": 32,
24
+ "patch_size": 14,
25
+ "qkv_bias": true,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.47.0"
28
+ }
mae-teaser.png ADDED

Git LFS Details

  • SHA256: c34eb1aee9efaf50004cddc489d709ac1edac5c7d88ecda3ad88de1746950ad2
  • Pointer size: 132 Bytes
  • Size of remote file: 1.06 MB
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8976428a93ecf9dbd823764be3ffab46dbc3bf0356d672adbbf41d65ab23a23e
3
+ size 2628376456