Spaces:

alexnasa
/

pixel3dmm

Running on Zero

App Files Files Community

alexnasa commited on Jun 9

Commit

cf92dec

verified ·

1 Parent(s): f90ed46

Upload 66 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +8 -0
.gitignore +21 -0
LICENSE +407 -0
assets/flame_mirror_index.npy +3 -0
assets/flame_uv_coords.npy +3 -0
assets/flame_vertex_weights.npy +3 -0
assets/head_template.obj +0 -0
assets/head_template_color.obj +0 -0
assets/head_template_color_tex.png +3 -0
assets/test_rigid.ply +3 -0
assets/uv_mask_eyes.png +0 -0
assets/uv_valid_verty.npy +3 -0
assets/uv_valid_verty_noEyes.npy +3 -0
assets/uv_valid_verty_noEyes_debug.npy +3 -0
assets/uv_valid_verty_noEyes_noEyeRegion_debug_wEars.npy +3 -0
bin/release.sh +21 -0
configs/base.yaml +234 -0
configs/tracking.yaml +110 -0
environment.yml +76 -0
example_videos/ex1.mp4 +3 -0
example_videos/ex2.mp4 +3 -0
example_videos/ex3.mp4 +3 -0
example_videos/ex4.mp4 +3 -0
example_videos/ex5.mp4 +3 -0
install_preprocessing_pipeline.sh +42 -0
media/banner.gif +3 -0
pyproject.toml +35 -0
requirements.txt +26 -0
scripts/.gitkeep +0 -0
scripts/network_inference.py +229 -0
scripts/run_cropping.py +107 -0
scripts/run_facer_segmentation.py +221 -0
scripts/run_preprocessing.py +23 -0
scripts/track.py +27 -0
scripts/viz_head_centric_cameras.py +103 -0
setup.py +7 -0
src/__init__.py +0 -0
src/pixel3dmm/__init__.py +0 -0
src/pixel3dmm/env_paths.py +34 -0
src/pixel3dmm/lightning/p3dmm_network.py +0 -0
src/pixel3dmm/lightning/p3dmm_system.py +491 -0
src/pixel3dmm/lightning/utils.py +119 -0
src/pixel3dmm/preprocessing/__init__.py +0 -0
src/pixel3dmm/preprocessing/pipnet_utils.py +348 -0
src/pixel3dmm/preprocessing/replacement_code/__init__.py +0 -0
src/pixel3dmm/preprocessing/replacement_code/facer_transform.py +397 -0
src/pixel3dmm/preprocessing/replacement_code/farl.py +94 -0
src/pixel3dmm/preprocessing/replacement_code/mica.py +120 -0
src/pixel3dmm/preprocessing/replacement_code/mica_demo.py +188 -0
src/pixel3dmm/preprocessing/replacement_code/pipnet_demo.py +401 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/head_template_color_tex.png filter=lfs diff=lfs merge=lfs -text
+assets/test_rigid.ply filter=lfs diff=lfs merge=lfs -text
+example_videos/ex1.mp4 filter=lfs diff=lfs merge=lfs -text
+example_videos/ex2.mp4 filter=lfs diff=lfs merge=lfs -text
+example_videos/ex3.mp4 filter=lfs diff=lfs merge=lfs -text
+example_videos/ex4.mp4 filter=lfs diff=lfs merge=lfs -text
+example_videos/ex5.mp4 filter=lfs diff=lfs merge=lfs -text
+media/banner.gif filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,21 @@

+# Python cache
+_pycache__
+*.py[cod]
+# PyCharm/Jupyter
+.idea
+.ipynb_checkpoints
+# build artifacts
+*.egg-info
+/docs/build/
+dist
+/src/pixel3dmm/preprocessing/facer/
+/src/pixel3dmm/preprocessing/MICA/
+/src/pixel3dmm/preprocessing/PIPNet/
+/pretrained_weights/
+/assets/
+*.mp4

LICENSE ADDED Viewed

	@@ -0,0 +1,407 @@

+Attribution-NonCommercial 4.0 International
+=======================================================================
+Creative Commons Corporation ("Creative Commons") is not a law firm and
+does not provide legal services or legal advice. Distribution of
+Creative Commons public licenses does not create a lawyer-client or
+other relationship. Creative Commons makes its licenses and related
+information available on an "as-is" basis. Creative Commons gives no
+warranties regarding its licenses, any material licensed under their
+terms and conditions, or any related information. Creative Commons
+disclaims all liability for damages resulting from their use to the
+fullest extent possible.
+Using Creative Commons Public Licenses
+Creative Commons public licenses provide a standard set of terms and
+conditions that creators and other rights holders may use to share
+original works of authorship and other material subject to copyright
+and certain other rights specified in the public license below. The
+following considerations are for informational purposes only, are not
+exhaustive, and do not form part of our licenses.
+     Considerations for licensors: Our public licenses are
+     intended for use by those authorized to give the public
+     permission to use material in ways otherwise restricted by
+     copyright and certain other rights. Our licenses are
+     irrevocable. Licensors should read and understand the terms
+     and conditions of the license they choose before applying it.
+     Licensors should also secure all rights necessary before
+     applying our licenses so that the public can reuse the
+     material as expected. Licensors should clearly mark any
+     material not subject to the license. This includes other CC-
+     licensed material, or material used under an exception or
+     limitation to copyright. More considerations for licensors:
+    wiki.creativecommons.org/Considerations_for_licensors
+     Considerations for the public: By using one of our public
+     licenses, a licensor grants the public permission to use the
+     licensed material under specified terms and conditions. If
+     the licensor's permission is not necessary for any reason--for
+     example, because of any applicable exception or limitation to
+     copyright--then that use is not regulated by the license. Our
+     licenses grant only permissions under copyright and certain
+     other rights that a licensor has authority to grant. Use of
+     the licensed material may still be restricted for other
+     reasons, including because others have copyright or other
+     rights in the material. A licensor may make special requests,
+     such as asking that all changes be marked or described.
+     Although not required by our licenses, you are encouraged to
+     respect those requests where reasonable. More considerations
+     for the public:
+    wiki.creativecommons.org/Considerations_for_licensees
+=======================================================================
+Creative Commons Attribution-NonCommercial 4.0 International Public
+License
+By exercising the Licensed Rights (defined below), You accept and agree
+to be bound by the terms and conditions of this Creative Commons
+Attribution-NonCommercial 4.0 International Public License ("Public
+License"). To the extent this Public License may be interpreted as a
+contract, You are granted the Licensed Rights in consideration of Your
+acceptance of these terms and conditions, and the Licensor grants You
+such rights in consideration of benefits the Licensor receives from
+making the Licensed Material available under these terms and
+conditions.
+Section 1 -- Definitions.
+  a. Adapted Material means material subject to Copyright and Similar
+     Rights that is derived from or based upon the Licensed Material
+     and in which the Licensed Material is translated, altered,
+     arranged, transformed, or otherwise modified in a manner requiring
+     permission under the Copyright and Similar Rights held by the
+     Licensor. For purposes of this Public License, where the Licensed
+     Material is a musical work, performance, or sound recording,
+     Adapted Material is always produced where the Licensed Material is
+     synched in timed relation with a moving image.
+  b. Adapter's License means the license You apply to Your Copyright
+     and Similar Rights in Your contributions to Adapted Material in
+     accordance with the terms and conditions of this Public License.
+  c. Copyright and Similar Rights means copyright and/or similar rights
+     closely related to copyright including, without limitation,
+     performance, broadcast, sound recording, and Sui Generis Database
+     Rights, without regard to how the rights are labeled or
+     categorized. For purposes of this Public License, the rights
+     specified in Section 2(b)(1)-(2) are not Copyright and Similar
+     Rights.
+  d. Effective Technological Measures means those measures that, in the
+     absence of proper authority, may not be circumvented under laws
+     fulfilling obligations under Article 11 of the WIPO Copyright
+     Treaty adopted on December 20, 1996, and/or similar international
+     agreements.
+  e. Exceptions and Limitations means fair use, fair dealing, and/or
+     any other exception or limitation to Copyright and Similar Rights
+     that applies to Your use of the Licensed Material.
+  f. Licensed Material means the artistic or literary work, database,
+     or other material to which the Licensor applied this Public
+     License.
+  g. Licensed Rights means the rights granted to You subject to the
+     terms and conditions of this Public License, which are limited to
+     all Copyright and Similar Rights that apply to Your use of the
+     Licensed Material and that the Licensor has authority to license.
+  h. Licensor means the individual(s) or entity(ies) granting rights
+     under this Public License.
+  i. NonCommercial means not primarily intended for or directed towards
+     commercial advantage or monetary compensation. For purposes of
+     this Public License, the exchange of the Licensed Material for
+     other material subject to Copyright and Similar Rights by digital
+     file-sharing or similar means is NonCommercial provided there is
+     no payment of monetary compensation in connection with the
+     exchange.
+  j. Share means to provide material to the public by any means or
+     process that requires permission under the Licensed Rights, such
+     as reproduction, public display, public performance, distribution,
+     dissemination, communication, or importation, and to make material
+     available to the public including in ways that members of the
+     public may access the material from a place and at a time
+     individually chosen by them.
+  k. Sui Generis Database Rights means rights other than copyright
+     resulting from Directive 96/9/EC of the European Parliament and of
+     the Council of 11 March 1996 on the legal protection of databases,
+     as amended and/or succeeded, as well as other essentially
+     equivalent rights anywhere in the world.
+  l. You means the individual or entity exercising the Licensed Rights
+     under this Public License. Your has a corresponding meaning.
+Section 2 -- Scope.
+  a. License grant.
+       1. Subject to the terms and conditions of this Public License,
+          the Licensor hereby grants You a worldwide, royalty-free,
+          non-sublicensable, non-exclusive, irrevocable license to
+          exercise the Licensed Rights in the Licensed Material to:
+            a. reproduce and Share the Licensed Material, in whole or
+               in part, for NonCommercial purposes only; and
+            b. produce, reproduce, and Share Adapted Material for
+               NonCommercial purposes only.
+       2. Exceptions and Limitations. For the avoidance of doubt, where
+          Exceptions and Limitations apply to Your use, this Public
+          License does not apply, and You do not need to comply with
+          its terms and conditions.
+       3. Term. The term of this Public License is specified in Section
+          6(a).
+       4. Media and formats; technical modifications allowed. The
+          Licensor authorizes You to exercise the Licensed Rights in
+          all media and formats whether now known or hereafter created,
+          and to make technical modifications necessary to do so. The
+          Licensor waives and/or agrees not to assert any right or
+          authority to forbid You from making technical modifications
+          necessary to exercise the Licensed Rights, including
+          technical modifications necessary to circumvent Effective
+          Technological Measures. For purposes of this Public License,
+          simply making modifications authorized by this Section 2(a)
+          (4) never produces Adapted Material.
+       5. Downstream recipients.
+            a. Offer from the Licensor -- Licensed Material. Every
+               recipient of the Licensed Material automatically
+               receives an offer from the Licensor to exercise the
+               Licensed Rights under the terms and conditions of this
+               Public License.
+            b. No downstream restrictions. You may not offer or impose
+               any additional or different terms or conditions on, or
+               apply any Effective Technological Measures to, the
+               Licensed Material if doing so restricts exercise of the
+               Licensed Rights by any recipient of the Licensed
+               Material.
+       6. No endorsement. Nothing in this Public License constitutes or
+          may be construed as permission to assert or imply that You
+          are, or that Your use of the Licensed Material is, connected
+          with, or sponsored, endorsed, or granted official status by,
+          the Licensor or others designated to receive attribution as
+          provided in Section 3(a)(1)(A)(i).
+  b. Other rights.
+       1. Moral rights, such as the right of integrity, are not
+          licensed under this Public License, nor are publicity,
+          privacy, and/or other similar personality rights; however, to
+          the extent possible, the Licensor waives and/or agrees not to
+          assert any such rights held by the Licensor to the limited
+          extent necessary to allow You to exercise the Licensed
+          Rights, but not otherwise.
+       2. Patent and trademark rights are not licensed under this
+          Public License.
+       3. To the extent possible, the Licensor waives any right to
+          collect royalties from You for the exercise of the Licensed
+          Rights, whether directly or through a collecting society
+          under any voluntary or waivable statutory or compulsory
+          licensing scheme. In all other cases the Licensor expressly
+          reserves any right to collect such royalties, including when
+          the Licensed Material is used other than for NonCommercial
+          purposes.
+Section 3 -- License Conditions.
+Your exercise of the Licensed Rights is expressly made subject to the
+following conditions.
+  a. Attribution.
+       1. If You Share the Licensed Material (including in modified
+          form), You must:
+            a. retain the following if it is supplied by the Licensor
+               with the Licensed Material:
+                 i. identification of the creator(s) of the Licensed
+                    Material and any others designated to receive
+                    attribution, in any reasonable manner requested by
+                    the Licensor (including by pseudonym if
+                    designated);
+                ii. a copyright notice;
+               iii. a notice that refers to this Public License;
+                iv. a notice that refers to the disclaimer of
+                    warranties;
+                 v. a URI or hyperlink to the Licensed Material to the
+                    extent reasonably practicable;
+            b. indicate if You modified the Licensed Material and
+               retain an indication of any previous modifications; and
+            c. indicate the Licensed Material is licensed under this
+               Public License, and include the text of, or the URI or
+               hyperlink to, this Public License.
+       2. You may satisfy the conditions in Section 3(a)(1) in any
+          reasonable manner based on the medium, means, and context in
+          which You Share the Licensed Material. For example, it may be
+          reasonable to satisfy the conditions by providing a URI or
+          hyperlink to a resource that includes the required
+          information.
+       3. If requested by the Licensor, You must remove any of the
+          information required by Section 3(a)(1)(A) to the extent
+          reasonably practicable.
+       4. If You Share Adapted Material You produce, the Adapter's
+          License You apply must not prevent recipients of the Adapted
+          Material from complying with this Public License.
+Section 4 -- Sui Generis Database Rights.
+Where the Licensed Rights include Sui Generis Database Rights that
+apply to Your use of the Licensed Material:
+  a. for the avoidance of doubt, Section 2(a)(1) grants You the right
+     to extract, reuse, reproduce, and Share all or a substantial
+     portion of the contents of the database for NonCommercial purposes
+     only;
+  b. if You include all or a substantial portion of the database
+     contents in a database in which You have Sui Generis Database
+     Rights, then the database in which You have Sui Generis Database
+     Rights (but not its individual contents) is Adapted Material; and
+  c. You must comply with the conditions in Section 3(a) if You Share
+     all or a substantial portion of the contents of the database.
+For the avoidance of doubt, this Section 4 supplements and does not
+replace Your obligations under this Public License where the Licensed
+Rights include other Copyright and Similar Rights.
+Section 5 -- Disclaimer of Warranties and Limitation of Liability.
+  a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
+     EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
+     AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
+     ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
+     IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
+     WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
+     PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
+     ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
+     KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
+     ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
+  b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
+     TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
+     NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
+     INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
+     COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
+     USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
+     ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
+     DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
+     IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
+  c. The disclaimer of warranties and limitation of liability provided
+     above shall be interpreted in a manner that, to the extent
+     possible, most closely approximates an absolute disclaimer and
+     waiver of all liability.
+Section 6 -- Term and Termination.
+  a. This Public License applies for the term of the Copyright and
+     Similar Rights licensed here. However, if You fail to comply with
+     this Public License, then Your rights under this Public License
+     terminate automatically.
+  b. Where Your right to use the Licensed Material has terminated under
+     Section 6(a), it reinstates:
+       1. automatically as of the date the violation is cured, provided
+          it is cured within 30 days of Your discovery of the
+          violation; or
+       2. upon express reinstatement by the Licensor.
+     For the avoidance of doubt, this Section 6(b) does not affect any
+     right the Licensor may have to seek remedies for Your violations
+     of this Public License.
+  c. For the avoidance of doubt, the Licensor may also offer the
+     Licensed Material under separate terms or conditions or stop
+     distributing the Licensed Material at any time; however, doing so
+     will not terminate this Public License.
+  d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
+     License.
+Section 7 -- Other Terms and Conditions.
+  a. The Licensor shall not be bound by any additional or different
+     terms or conditions communicated by You unless expressly agreed.
+  b. Any arrangements, understandings, or agreements regarding the
+     Licensed Material not stated herein are separate from and
+     independent of the terms and conditions of this Public License.
+Section 8 -- Interpretation.
+  a. For the avoidance of doubt, this Public License does not, and
+     shall not be interpreted to, reduce, limit, restrict, or impose
+     conditions on any use of the Licensed Material that could lawfully
+     be made without permission under this Public License.
+  b. To the extent possible, if any provision of this Public License is
+     deemed unenforceable, it shall be automatically reformed to the
+     minimum extent necessary to make it enforceable. If the provision
+     cannot be reformed, it shall be severed from this Public License
+     without affecting the enforceability of the remaining terms and
+     conditions.
+  c. No term or condition of this Public License will be waived and no
+     failure to comply consented to unless expressly agreed to by the
+     Licensor.
+  d. Nothing in this Public License constitutes or may be interpreted
+     as a limitation upon, or waiver of, any privileges and immunities
+     that apply to the Licensor or You, including from the legal
+     processes of any jurisdiction or authority.
+=======================================================================
+Creative Commons is not a party to its public
+licenses. Notwithstanding, Creative Commons may elect to apply one of
+its public licenses to material it publishes and in those instances
+will be considered the "Licensor." The text of the Creative Commons
+public licenses is dedicated to the public domain under the CC0 Public
+Domain Dedication. Except for the limited purpose of indicating that
+material is shared under a Creative Commons public license or as
+otherwise permitted by the Creative Commons policies published at
+creativecommons.org/policies, Creative Commons does not authorize the
+use of the trademark "Creative Commons" or any other trademark or logo
+of Creative Commons without its prior written consent including,
+without limitation, in connection with any unauthorized modifications
+to any of its public licenses or any other arrangements,
+understandings, or agreements concerning use of licensed material. For
+the avoidance of doubt, this paragraph does not form part of the
+public licenses.
+Creative Commons may be contacted at creativecommons.org.

assets/flame_mirror_index.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:374636351ce484f36a3524af05cb00ed5a4f4a38ab759ea9bffce6e3fb761153
+size 40312

assets/flame_uv_coords.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:194adb9a1c1bc76a85c4e8c446d5dfaca93f0f42f157eb82659457c057c71527
+size 80496

assets/flame_vertex_weights.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e860e97be4209f3ee7351a7359cb01b9870d56fc93be3b76cdd7e657eb9346d
+size 60404

assets/head_template.obj ADDED Viewed

The diff for this file is too large to render. See raw diff

assets/head_template_color.obj ADDED Viewed

The diff for this file is too large to render. See raw diff

assets/head_template_color_tex.png ADDED Viewed

Git LFS Details

SHA256: da8bb9232572dbc286aae59cc56be1a8ac4d5c97b58c0275eda8b7b63e2828bf
Pointer size: 131 Bytes
Size of remote file: 192 kB

assets/test_rigid.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:427556b014087cf1aea6bbcddec815dbcc5c55a3334de1786a362068e716cfe6
+size 210354

assets/uv_mask_eyes.png ADDED Viewed

assets/uv_valid_verty.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84f36cdf5ebe1cb4db88f3126ef82c5a59a48ca45ea396ffa23e4b50ac0ce06b
+size 14424

assets/uv_valid_verty_noEyes.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7428896b5b1d4c9db6c0c1e2a4e98c412e95cb62243e5463311cb714c8f3820c
+size 9832

assets/uv_valid_verty_noEyes_debug.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bea26d06186de1f51d0d9687e4eb723a816911fe224c46ff328c0664f7e68bd5
+size 15096

assets/uv_valid_verty_noEyes_noEyeRegion_debug_wEars.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:379a44c27fcc3e05818588f88073d6dd53a48c3ef11bb56afe7d03657f256fb6
+size 19912

bin/release.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+#!/bin/bash
+while [[ "$#" -gt 0 ]]; do
+	case $1 in
+		-t|--test) test=1 ;;
+		*) echo "Unknown parameter: $1"; exit 1 ;;
+	esac
+	shift
+done
+if [[ $test ]]; then
+	twine_params="--repository testpypi"
+else
+	twine_params=""
+fi
+cd "${0%/*}/.."
+rm -r dist/*
+python -m build
+twine upload $twine_params dist/*
+# Username: tobias.kirschstein

configs/base.yaml ADDED Viewed

	@@ -0,0 +1,234 @@

+gpu_id: [0] #[4,5,6,7]
+exp_name: LaRa/release-test-head-cluster
+n_views: 2
+reconstruction_folder: recs
+flame_folder : /home/giebenhain/face-tracking
+flame_folder_assets : /home/giebenhain/face-tracking/flame/
+flame_base_mesh: /home/giebenhain/PycharmProjects/non-rigid-registration/template/test_rigid.ply
+exp_tag : _
+viz_uv_mesh : False
+model:
+    attn_drop : 0.2
+    model_type: 'flame_params'
+    prediction_type: 'normals'
+    network_type: 'transformer'
+    encoder_backbone: 'vit_small_patch16_224.dino' #'vit_base_patch16_224.dino' #'vit_base_patch16_224.dino' # ['vit_small_patch16_224.dino','vit_base_patch16_224.dino']
+    n_groups: [16]  # n_groups for local attention
+    n_offset_groups: 32     # offset radius of 1/n_offset_groups of the scene size
+    K: 2    # primitives per-voxel
+    sh_degree: 1    # view dependent color
+    num_layers: 6 #6 #12
+    num_heads: 8 #16
+    view_embed_dim: 16 #32
+    embedding_dim: 256 #128 #256
+    vol_feat_reso: 16
+    vol_embedding_reso: 32
+    vol_embedding_out_dim: 40 #80
+    ckpt_path: null # specify a ckpt path if you want to continue training
+    flame_dim: 101
+    finetune_backbone: False
+    feature_map_type: DINO
+    pred_conf: False
+    pred_disentangled : False
+    nocs : True
+    use_pos_enc : False
+    conv_dec : True
+    use_plucker : False
+    use_uv_enc : True
+    n_facial_components : 0
+    render_super : False
+    flame_shape_dim : 300
+    flame_expr_dim : 100
+    prior_input : False
+    use_neutral : True
+    reg_inner : True
+    n_inner_steps : 0 #20
+    corresp_align : False
+    pred_dim : 4
+    outer_vertex_mask : False
+    downsample_inps : False
+    flame2020 : True
+    use_mica : False
+    branched : True
+train_dataset:
+    dataset_name: gobjeverse
+    data_root: /mnt/rohan/cluster/andram/sgiebenhain/objaverse_imposter8_cropped_prepped_00.hdf5 #cluster/andram/sgiebenhain/objaverse_imposter3_prepped_00.hdf5 #/mnt/hdd/dataset/gobjaverse/gobjaverse_part_01.h5 #/mnt/rohan /home/giebenhain/proj4/objaverse_imposter2_prepped_00.hdf5 #dataset/gobjaverse/gobjaverse.h5
+    split: train
+    img_size: [512,512] # image resolution
+    n_group: ${n_views}   # image resolution
+    n_scenes: 3000000
+itl:
+    lr_expr: 0.1
+    lr_id: 0.05
+    lr_cam_pos: 0.0001 #0.005
+    lr_cam_rot: 0.001 #0.01
+    lr_fl: 0.01 #0.03
+    lr_pp: 0.00001 #0.002
+    lr_jaw : 0.0001
+    lr_expr_outer : 0.00001
+    lr_shape_outer : 0.00001
+    lr_cam_pos_outer : 0.000001
+    lr_cam_rot_outer : 0.000001
+    lr_fl_outer : 0.000001
+    lr_pp_outer : 0.000001
+    noise_strenght : 0.5
+    ffwd_init : True
+    ffwd_init_flame : True
+    ffwd_flame_weight : 0.01
+    scale_reg_id : 10
+    scale_reg_ex : 10
+    scale_confidence : 10
+    n_steps_cam : 0
+    use_uv : True
+    use_n : True
+    use_ncan : False
+    use_disp : False
+    reg_conf: 0.01
+    totvar_conf : 1.0
+    uv_loss_mult : 3
+    n_loss_mult : 0.0
+    const_conf : False
+    uv_l2 : False
+    n_mask_new : False
+    reg_shape: 0.01
+    reg_shape_ffwd: 0.01
+    reg_expr: 0.01
+    reg_expr_ffwd: 0.01
+    rnd_warmup : False
+    use_outer_normals : True
+    normal_inp : True
+    rnd_n_inner : False
+    n_inner_min : 20
+    n_inner_max : 100
+    fov_mult : 1.0
+    outer_l2 : True
+    pred_face_region : False
+    sup_back_more : True
+data:
+    load_normal: False
+    load_flame: False
+    load_uv : False
+    load_pos_map : False
+    load_depth : False
+    load_verts : False
+    load_arcface : False
+    load_albedo : False
+    load_nocs : False
+    mirror_aug : False
+    disable_aug: False
+    disable_color_aug: False
+    use_nphm : True
+    use_ava : True
+    use_facescape : True
+    use_celeba : False
+    use_lyhm : True
+    use_stirling : True
+    use_video : False
+    use_cafca : True
+    use_now : False
+    use_mimicme : True
+    add_occ : False
+    use_p3dmm : True
+    load_consist : False
+    load_prior : False
+    overfit : False
+    more_verts: False
+    load_facer: False
+test_dataset:
+    dataset_name: gobjeverse
+    data_root: /mnt/rohan/cluster/andram/sgiebenhain/objaverse_imposter8_cropped_prepped_00.hdf5 #cluster/andram/sgiebenhain/objaverse_imposter3_prepped_00.hdf5 #/mnt/hdd/dataset/gobjaverse/gobjaverse_part_01.h5 #/mnt/rohan /home/giebenhain/proj4/objaverse_imposter2_prepped_00.hdf5 #dataset/gobjaverse/gobjaverse.h5
+    split: test
+    img_size: [512,512]
+    n_group: ${n_views}
+    n_scenes: 3000000
+train:
+    batch_size: 8 #3
+    lr: 4e-4 #1e-2 #4e-4
+    lr_backbone: 1e-5 #4e-4
+    beta1: 0.9
+    beta2: 0.95 #0.95
+    weight_decay: 0.05
+    warmup_iters: 200
+    n_epoch: 3000 #3000
+    limit_train_batches: 0.05 #0.2 #1.0 #0.1 #1.0 #0.2
+    limit_val_batches: 0.02 #0.05 #1 #0.02
+    check_val_every_n_epoch: 1
+    start_fine: 5000
+    use_rand_views: False
+    duster_loss: False
+    start_2d_vertex_loss : 500 #2500
+    start_normal_render_loss : 1000 #5000
+test:
+    batch_size: 8 #3
+logger:
+    name: wandb #tensorboard
+    dir: logs/${exp_name}

configs/tracking.yaml ADDED Viewed

	@@ -0,0 +1,110 @@

+config_name : test
+batch_size : 16
+num_views : 1
+size : 256
+image_size : [256, 256] # use this instead hardcoding a bunch of 512 and 256
+data_folder : none
+p3dmm_folder : none
+extra_cam_steps : False
+big_normal_mask : False
+start_frame : 0
+num_shape_params : 300
+num_exp_params : 100
+tex_params : 140
+iters : 200 #800 #400
+no_lm : False
+use_eyebrows : False
+use_mouth_lmk : True
+no_pho : True
+no_sh : True
+disable_edge : False
+keyframes : []
+ignore_mica : False
+flame2023 : False
+uv_map_super : 2000.0 #500.0 #100 #2001.0 #5000.0 #2000.0
+normal_super : 1000.0 #202.0
+normal_super_can : 0.0
+sil_super : 500
+uv_loss:
+  stricter_uv_mask : False
+  delta_uv : 0.00005 #0.00005 #0.0005 #0.00005
+  delta_uv_fine : 0.00005 #0.00005 #0.0005 #0.00005
+  dist_uv : 20 #20 #15
+  dist_uv_fine : 20 #35 #20 #15
+occ_filter : True
+lr_id : 0.002 #0.003 #0.006 #0.003
+lr_exp : 0.005 # 0.005 #0.01 #0.01 #0.005
+lr_jaw : 0.005 #0.003
+lr_neck : 0.001 #0.0005
+lr_R : 0.005 #0.005 #0.002 #0.01# 0.0001
+lr_t : 0.001 #0.002 #0.001 #0.0005 #0.0005
+lr_f : 0.1 #0.05 #0.01 #0.001
+lr_pp : 0.00005
+w_pho : 150
+w_lmks : 3000
+w_lmks_mouth : 1000
+w_lmks_68 : 1000
+w_lmks_lid : 1000
+w_lmks_iris : 1000
+w_lmks_oval : 2000
+w_lmks_star : 0
+include_neck : True
+w_shape :  0.2
+w_shape_general : 0.05
+w_exp : 0.05
+w_jaw : 0.01
+w_neck : 0.1
+n_fine : False
+low_overhead : False
+delta_n : 0.33
+global_camera : True
+global_iters : 5000
+reg_smooth_exp : 50.0
+reg_smooth_eyes : 10.0
+reg_smooth_eyelids : 2.0
+reg_smooth_jaw : 50.0
+reg_smooth_neck : 1000.0
+reg_smooth_R : 2000.0
+reg_smooth_t : 15200.0
+reg_smooth_pp : 420.0
+reg_smooth_fl : 420.0
+reg_smooth_mult : 1.0
+uv_l2 : True
+normal_l2 : False
+smooth : True
+normal_mask_ksize : 13
+early_stopping_delta : 5.0
+early_exit : False
+draw_uv_corresp : False
+save_landmarks : False
+save_meshes : True
+delete_preprocessing : False

environment.yml ADDED Viewed

	@@ -0,0 +1,76 @@

+# Note: conda dependencies have only 1 "="
+#       pip dependencies have 2 "=="
+# Fuse pip dependencies together under one " - pip" item
+#       Otherwise, only some of the are installed, because conda creates a temporary requirements.txt file
+#       only the last -pip section
+name: p3dmm
+channels:
+  - pytorch
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.9
+  - pip
+  - jupyter
+  # CUDA and PyTorch
+  - gcc<12  # Needs to be <12 because nvcc does not like gcc>11
+  - gxx
+  - torchvision
+  -
+  - nvidia/label/cuda-11.8.0::cuda-nvcc  # for nvcc
+  - nvidia/label/cuda-11.8.0::cuda-cccl
+  - nvidia/label/cuda-11.8.0::cuda-cudart
+  - nvidia/label/cuda-11.8.0::cuda-cudart-dev  # for cuda_runtime.h
+  - nvidia/label/cuda-11.8.0::libcusparse
+  - nvidia/label/cuda-11.8.0::libcusparse-dev
+  - nvidia/label/cuda-11.8.0::libcublas
+  - nvidia/label/cuda-11.8.0::libcublas-dev
+  - nvidia/label/cuda-11.8.0::libcurand
+  - nvidia/label/cuda-11.8.0::libcurand-dev
+  - nvidia/label/cuda-11.8.0::libcusolver
+  - nvidia/label/cuda-11.8.0::libcusolver-dev
+  - pip:
+  - pip:
+      - --extra-index-url https://download.pytorch.org/whl/cu118
+      - torch==2.7+cu118
+      - torchvision==0.22+cu118
+      - tyro
+      - environs
+      - omegaconf
+      - dreifus
+      - wandb
+      - pytorch_lightning
+      - opencv-python
+      - tensorboard
+      - wandb
+      - scikit-image
+      - pyvista
+      - chumpy
+      - h5py
+      - einops
+      - ninja
+      - mediapy
+      - face-alignment==1.3.3
+      - numpy==1.23
+      - git+https://github.com/facebookresearch/pytorch3d.git@stable
+      - git+https://github.com/NVlabs/nvdiffrast.git
+    # for MICA
+      - insightface
+      - onnxruntime
+      - loguru
+      - yacs
+    # facer
+      - distinctipy
+      - validators
+      - timm

example_videos/ex1.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bcf05f5d3ff2dabaad3ec3562b1ea463bdc2324ffa1cb5875f4468f5341e5f4
+size 662545

example_videos/ex2.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71f5bc28eb0bc3fb23dfe4079e303c382e1036b25553c12a8dda208b5ebb9a44
+size 822778

example_videos/ex3.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e5127eb860778a01b0b33ff0a5760f604a29232f1cdd695fdc8499300d607a6
+size 326767

example_videos/ex4.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8f71ee7d60490725cb463b9da247c2b3d08f9d01a8dbd566726b599cee53199
+size 375763

example_videos/ex5.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05a0c8807a31740243d9a1e5ae34f3fd4990701202ffae256e33e70e1f5fa5a9
+size 587737

install_preprocessing_pipeline.sh ADDED Viewed

	@@ -0,0 +1,42 @@

+#!/bin/bash
+cd src/pixel3dmm/preprocessing/
+# facer repository
+git clone [email protected]:FacePerceiver/facer.git
+cd facer
+cp ../replacement_code/farl.py facer/face_parsing/farl.py
+cp ../replacement_code/facer_transform.py facer/transform.py
+pip install -e .
+cd ..
+# MICA
+git clone [email protected]:Zielon/MICA.git
+cd MICA
+cp ../replacement_code/mica_demo.py demo.py
+cp ../replacement_code/mica.py micalib/models/mica.py
+./install.sh
+cd ..
+#TODO: Maybe need to copy these flame weights to trackign/FLAME as well, or ideally adjust some paths instead
+# PIPnet
+git clone https://github.com/jhb86253817/PIPNet.git
+cd PIPNet
+cd FaceBoxesV2/utils
+sh make.sh
+cd ../..
+mkdir snapshots
+mkdir snapshots/WFLW/
+mkdir snapshots/WFLW/pip_32_16_60_r18_l2_l1_10_1_nb10/
+gdown --id 1nVkaSbxy3NeqblwMTGvLg4nF49cI_99C -O snapshots/WFLW/pip_32_16_60_r18_l2_l1_10_1_nb10/epoch59.pth
+#mkdir snapshots/WFLW/pip_32_16_60_r101_l2_l1_10_1_nb10/
+#gdown --id 1Jb97z5Z5ca61-6W2RDOK0e2w_RlbeWgS -O snapshots/WFLW/pip_32_16_60_r101_l2_l1_10_1_nb10/epoch59.pth
+cd ../../../../
+mkdir pretrained_weights
+cd pretrained_weights
+gdown --id 1SDV_8_qWTe__rX_8e4Fi-BE3aES0YzJY -O ./uv.ckpt
+gdown --id 1KYYlpN-KGrYMVcAOT22NkVQC0UAfycMD -O ./normals.ckpt

media/banner.gif ADDED Viewed

Git LFS Details

SHA256: 7a8efa82d3b64240743c3b5870f04bce8def66e8ee2021d315dfa649f6837ae2
Pointer size: 132 Bytes
Size of remote file: 3.13 MB

pyproject.toml ADDED Viewed

	@@ -0,0 +1,35 @@

+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "pixel3dmm"  # DON'T FORGET TO REMOVE empty FROM git remote!!!
+version = "0.0.1"
+description = "<<<ENTER_DESCRIPTION>>>"
+authors = [
+    { name = "Simon Giebenhain", email = "[email protected]" },
+]
+readme = "README.md"
+license = { text = "CC BY-NC 4.0" }
+requires-python = ">=3.9.0"
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Programming Language :: Python :: 3",
+    "Operating System :: OS Independent"
+]
+# urls = { Documentation = "<<<ENTER_LINK_TO_DOCUMENTATION>>>" }
+# Main dependencies
+dependencies = [
+]
+[project.optional-dependencies]
+# Development packages, install via <<<PROJECT_NAME>>>[dev]
+dev = [
+]
+[project.scripts]
+# E.g., ns-download-data = "scripts.downloads.download_data:entrypoint"
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["pixel3dmm*"]  # Keep the '*', otherwise submodules are not found

requirements.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+numpy==1.23
+omegaconf
+opencv-python
+tensorboard
+wandb
+scikit-image
+pyvista
+dreifus
+chumpy
+h5py
+pytorch_lightning
+einops
+mediapy
+face-alignment==1.3.3
+ninja
+insightface
+onnxruntime
+loguru
+yacs
+distinctipy
+validators
+timm
+tyro
+environs

scripts/.gitkeep ADDED Viewed

File without changes

scripts/network_inference.py ADDED Viewed

	@@ -0,0 +1,229 @@

+import traceback
+from tqdm import tqdm
+import os
+import torch
+import numpy as np
+from PIL import Image
+from omegaconf import OmegaConf
+from time import time
+from pixel3dmm.utils.uv import uv_pred_to_mesh
+from pixel3dmm.lightning.p3dmm_system import system as p3dmm_system
+#from pixel3dmm.lightning.system_flame_params_legacy import system as system_flame_params_legacy
+from pixel3dmm import env_paths
+def pad_to_3_channels(img):
+    if img.shape[-1] == 3:
+        return img
+    elif img.shape[-1] == 1:
+        return np.concatenate([img, np.zeros_like(img[..., :1]), np.zeros_like(img[..., :1])], axis=-1)
+    elif img.shape[-1] == 2:
+        return np.concatenate([img, np.zeros_like(img[..., :1])], axis=-1)
+    else:
+        raise ValueError('too many dimensions in prediction type!')
+def gaussian_fn(M, std):
+    n = torch.arange(0, M) - (M - 1.0) / 2.0
+    sig2 = 2 * std * std
+    w = torch.exp(-n ** 2 / sig2)
+    return w
+def gkern(kernlen=256, std=128):
+    """Returns a 2D Gaussian kernel array."""
+    gkern1d_x = gaussian_fn(kernlen, std=std * 5)
+    gkern1d_y = gaussian_fn(kernlen, std=std)
+    gkern2d = torch.outer(gkern1d_y, gkern1d_x)
+    return gkern2d
+valid_verts = np.load(f'{env_paths.VALID_VERTICES_WIDE_REGION}')
+def main(cfg):
+    if cfg.model.prediction_type == 'flame_params':
+        cfg.data.mirror_aug = False
+    # data loader
+    if cfg.model.feature_map_type == 'DINO':
+        feature_map_size = 32
+    elif cfg.model.feature_map_type == 'sapiens':
+        feature_map_size = 64
+    batch_size = 1 #cfg.inference_batch_size
+    checkpoints = {
+    'uv_map': f"{env_paths.CKPT_UV_PRED}",
+    'normals': f"{env_paths.CKPT_N_PRED}",
+    }
+    model_checkpoint = checkpoints[cfg.model.prediction_type]
+    model = None
+    prediction_types = cfg.model.prediction_type.split(',')
+    conv = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=11, bias=False, padding='same')
+    g_weights = gkern(11, 2)
+    g_weights /= torch.sum(g_weights)
+    conv.weight = torch.nn.Parameter(g_weights.unsqueeze(0).unsqueeze(0))
+    OUT_NAMES = str(cfg.video_name).split(',')
+    print(f'''
+            <<<<<<<< STARTING PIXEL3DMM INFERENCE for {cfg.video_name} in {prediction_types} MODE >>>>>>>>
+            ''')
+    for OUT_NAME in OUT_NAMES:
+        folder = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/'
+        IMAGE_FOLDER = f'{folder}/cropped'
+        SEGEMNTATION_FOLDER = f'{folder}/seg_og/'
+        out_folders = {}
+        out_folders_wGT = {}
+        out_folders_viz = {}
+        for prediction_type in prediction_types:
+            out_folders[prediction_type] = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/p3dmm/{prediction_type}/'
+            out_folders_wGT[prediction_type] = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/p3dmm_wGT/{prediction_type}/'
+            os.makedirs(out_folders[prediction_type], exist_ok=True)
+            os.makedirs(out_folders_wGT[prediction_type], exist_ok=True)
+            out_folders_viz[prediction_type] = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/p3dmm_extraViz/{prediction_type}/'
+            os.makedirs(out_folders_viz[prediction_type], exist_ok=True)
+        image_names = os.listdir(f'{IMAGE_FOLDER}')
+        image_names.sort()
+        if os.path.exists(out_folders[prediction_type]):
+            if len(os.listdir(out_folders[prediction_type])) == len(image_names):
+                return
+        if model is None:
+            model = p3dmm_system.load_from_checkpoint(model_checkpoint, strict=False)
+            # TODO: disable randomness, dropout, etc...
+            # model.eval()
+            model = model.cuda()
+        for i in tqdm(range(len(image_names))):
+            #if not int(image_names[i].split('_')[0]) in [17, 175, 226, 279]:
+            #    continue
+            try:
+                for i_batch in range(batch_size):
+                    img = np.array(Image.open(f'{IMAGE_FOLDER}/{image_names[i]}').resize((512, 512))) / 255 # need 512,512 images as input; normalize to [0, 1] range
+                    img = torch.from_numpy(img)[None, None].float().cuda() # 1,1,512,512,3
+                    img_seg = np.array(Image.open(f'{SEGEMNTATION_FOLDER}/{image_names[i][:-4]}.png').resize((512, 512), Image.NEAREST))
+                    if len(img_seg.shape) == 3:
+                        img_seg = img_seg[..., 0]
+                    #img_seg = np.array(Image.open(f'{SEGEMNTATION_FOLDER}/{int(image_names[i][:-4])*3:05d}.png').resize((512, 512), Image.NEAREST))
+                    mask = ((img_seg == 2) | ((img_seg > 3) & (img_seg < 14)) ) &  ~(img_seg==11)
+                    mask = torch.from_numpy(mask).long().cuda()[None, None] # 1, 1, 512, 512
+                    #mask = torch.ones_like(img[..., 0]).cuda().bool()
+                    batch = {
+                        'tar_msk': mask,
+                        'tar_rgb': img,
+                    }
+                    batch_mirrored = {
+                    'tar_rgb': torch.flip(batch['tar_rgb'], dims=[3]).cuda(),
+                    'tar_msk': torch.flip(batch['tar_msk'], dims=[3]).cuda(),
+                    }
+                # execute model twice, once with original image, once with mirrored original image,
+                #   and then average results after undoing the mirroring operation on the prediction
+                with torch.no_grad():
+                    output, conf = model.net(batch)
+                    output_mirrored, conf = model.net(batch_mirrored)
+                    if 'uv_map' in output:
+                        fliped_uv_pred = torch.flip(output_mirrored['uv_map'], dims=[4])
+                        fliped_uv_pred[:, :, 0, :, :] *= -1
+                        fliped_uv_pred[:, :, 0, :, :] += 2*0.0075
+                        output['uv_map'] = (output['uv_map'] + fliped_uv_pred)/2
+                    if 'normals' in output:
+                        fliped_uv_pred = torch.flip(output_mirrored['normals'], dims=[4])
+                        fliped_uv_pred[:, :, 0, :, :] *= -1
+                        output['normals'] = (output['normals'] + fliped_uv_pred)/2
+                    if 'disps' in output:
+                        fliped_uv_pred = torch.flip(output_mirrored['disps'], dims=[4])
+                        fliped_uv_pred[:, :, 0, :, :] *= -1
+                        output['disps'] = (output['disps'] + fliped_uv_pred)/2
+                for prediction_type in prediction_types:
+                    for i_batch in range(batch_size):
+                        i_view = 0
+                        gt_rgb = batch['tar_rgb']
+                        # normalize to [0,1] range
+                        if prediction_type == 'uv_map':
+                            tmp_output = torch.clamp((output[prediction_type][i_batch, i_view] + 1) / 2, 0, 1)
+                        elif prediction_type == 'disps':
+                            tmp_output = torch.clamp((output[prediction_type][i_batch, i_view] + 50) / 100, 0, 1)
+                        elif prediction_type in ['normals', 'normals_can']:
+                            tmp_output = output[prediction_type][i_batch, i_view]
+                            tmp_output = tmp_output / torch.norm(tmp_output, dim=0).unsqueeze(0)
+                            tmp_output = torch.clamp((tmp_output + 1) / 2, 0, 1)
+                            # undo "weird" convention of normals that I used for preprocessing
+                            tmp_output = torch.stack(
+                                [tmp_output[0, ...], 1 - tmp_output[2, ...], 1 - tmp_output[1, ...]],
+                                dim=0)
+                        content = [
+                            gt_rgb[i_batch, i_view].detach().cpu().numpy(),
+                            pad_to_3_channels(tmp_output.permute(1, 2, 0).detach().cpu().float().numpy()),
+                        ]
+                        catted = (np.concatenate(content, axis=1) * 255).astype(np.uint8)
+                        Image.fromarray(catted).save(f'{out_folders_wGT[prediction_type]}/{image_names[i]}')
+                        Image.fromarray(
+                            pad_to_3_channels(
+                                tmp_output.permute(1, 2, 0).detach().cpu().float().numpy() * 255).astype(
+                                np.uint8)).save(
+                            f'{out_folders[prediction_type]}/{image_names[i][:-4]}.png')
+                        # this visulization is quite slow, therefore disable it per default
+                        if prediction_type == 'uv_map' and cfg.viz_uv_mesh:
+                            to_show_non_mirr = uv_pred_to_mesh(
+                                output[prediction_type][i_batch:i_batch + 1, ...],
+                                batch['tar_msk'][i_batch:i_batch + 1, ...],
+                                batch['tar_rgb'][i_batch:i_batch + 1, ...],
+                                right_ear = [537, 1334, 857, 554, 941],
+                                left_ear = [541, 476, 237, 502, 286],
+                            )
+                            Image.fromarray(to_show_non_mirr).save(f'{out_folders_viz[prediction_type]}/{image_names[i]}')
+            except Exception as exx:
+                traceback.print_exc()
+                pass
+    print(f'''
+                <<<<<<<< FINISHED PIXEL3DMM INFERENCE for {cfg.video_name} in {prediction_types} MODE >>>>>>>>
+                ''')
+if __name__ == '__main__':
+    base_conf = OmegaConf.load(f'{env_paths.CODE_BASE}/configs/base.yaml')
+    cli_conf = OmegaConf.from_cli()
+    cfg = OmegaConf.merge(base_conf, cli_conf)
+    main(cfg)

scripts/run_cropping.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import traceback
+import os
+import sys
+import importlib
+import mediapy
+from PIL import Image
+import tyro
+import torchvision.transforms as transforms
+from pixel3dmm import env_paths
+sys.path.append(f'{env_paths.CODE_BASE}/src/pixel3dmm/preprocessing/PIPNet/FaceBoxesV2/')
+from pixel3dmm.preprocessing.pipnet_utils import demo_image
+from pixel3dmm import env_paths
+def run(exp_path, image_dir, start_frame = 0,
+        vertical_crop : bool = False,
+        static_crop : bool = False,
+        max_bbox : bool = False,
+        disable_cropping : bool = False,
+        ):
+    experiment_name = exp_path.split('/')[-1][:-3]
+    data_name = exp_path.split('/')[-2]
+    config_path = '.experiments.{}.{}'.format(data_name, experiment_name)
+    my_config = importlib.import_module(config_path, package='pixel3dmm.preprocessing.PIPNet')
+    Config = getattr(my_config, 'Config')
+    cfg = Config()
+    cfg.experiment_name = experiment_name
+    cfg.data_name = data_name
+    save_dir = os.path.join(f'{env_paths.CODE_BASE}/src/pixel3dmm/preprocessing/PIPNet/snapshots', cfg.data_name, cfg.experiment_name)
+    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                     std=[0.229, 0.224, 0.225])
+    preprocess = transforms.Compose(
+        [transforms.Resize((cfg.input_size, cfg.input_size)), transforms.ToTensor(), normalize])
+    #for pid in pids:
+    pid = "FaMoS_180424_03335_TA_selfie_IMG_0092.jpg"
+    pid = "FaMoS_180426_03336_TA_selfie_IMG_0152.jpg"
+    demo_image(image_dir, pid, save_dir, preprocess, cfg, cfg.input_size, cfg.net_stride, cfg.num_nb,
+                           cfg.use_gpu,
+                            start_frame=start_frame, vertical_crop=vertical_crop, static_crop=static_crop, max_bbox=max_bbox,
+               disable_cropping=disable_cropping)
+def unpack_images(base_path, video_or_images_path):
+    if not os.path.exists(base_path):
+        os.makedirs(base_path, exist_ok=True)
+    if os.path.isdir(video_or_images_path):
+        files = os.listdir(f'{video_or_images_path}')
+        files.sort()
+        if len(os.listdir(base_path)) == len(files):
+            print(f'''
+                        <<<<<<<< ALREADY COMPLETED IMAGE CROPPING for {video_or_images_path}, SKIPPING! >>>>>>>>
+                        ''')
+            return
+        for i, file in enumerate(files):
+            I = Image.open(f'{video_or_images_path}/{file}')
+            I.save(f'{base_path}/{i:05d}.jpg', quality=95)
+    elif video_or_images_path.endswith('.jpg') or video_or_images_path.endswith('.jpeg') or video_or_images_path.endswith('.png'):
+        Image.open(video_or_images_path).save(f'{base_path}/{0:05d}.jpg', quality=95)
+    else:
+        frames = mediapy.read_video(f'{video_or_images_path}')
+        if len(frames) == len(os.listdir(base_path)):
+            return
+        for i, frame in enumerate(frames):
+            Image.fromarray(frame).save(f'{base_path}/{i:05d}.jpg', quality=95)
+def main(video_or_images_path : str,
+         max_bbox : bool = True, # not used
+         disable_cropping : bool = False):
+    if os.path.isdir(video_or_images_path):
+        video_name = video_or_images_path.split('/')[-1]
+    else:
+        video_name = video_or_images_path.split('/')[-1][:-4]
+    base_path = f'{env_paths.PREPROCESSED_DATA}/{video_name}/rgb/'
+    unpack_images(base_path, video_or_images_path)
+    if os.path.exists(f'{env_paths.PREPROCESSED_DATA}/{video_name}/cropped/'):
+        if len(os.listdir(base_path)) == len(os.listdir(f'{env_paths.PREPROCESSED_DATA}/{video_name}/cropped/')):
+            return
+    start_frame = -1
+    run('experiments/WFLW/pip_32_16_60_r18_l2_l1_10_1_nb10.py', base_path, start_frame=start_frame, vertical_crop=False,
+        static_crop=True, max_bbox=max_bbox, disable_cropping=disable_cropping)
+    # run('experiments/WFLW/pip_32_16_60_r101_l2_l1_10_1_nb10.py', base_path, start_frame=start_frame, vertical_crop=False, static_crop=True)
+if __name__ == '__main__':
+    tyro.cli(main)

scripts/run_facer_segmentation.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import os
+import sys
+import traceback
+from math import ceil
+import PIL.Image
+import torch
+import distinctipy
+import matplotlib.pyplot as plt
+from PIL import Image
+import numpy as np
+import facer
+import tyro
+from pixel3dmm import env_paths
+colors = distinctipy.get_colors(22, rng=0)
+def viz_results(img, seq_classes, n_classes, suppress_plot = False):
+    seg_img = np.zeros([img.shape[-2], img.shape[-1], 3])
+    #distinctipy.color_swatch(colors)
+    bad_indices = [
+        0,  # background,
+        1,  # neck
+        # 2, skin
+        3,  # cloth
+        4,  # ear_r (images-space r)
+        5,  # ear_l
+        # 6 brow_r
+        # 7 brow_l
+        # 8,  # eye_r
+        # 9,  # eye_l
+        # 10 noise
+        # 11 mouth
+        # 12 lower_lip
+        # 13 upper_lip
+        14,  # hair,
+        # 15, glasses
+        16,  # ??
+        17,  # earring_r
+        18,  # ?
+    ]
+    bad_indices = []
+    for i in range(n_classes):
+        if i not in bad_indices:
+            seg_img[seq_classes[0, :, :] == i] = np.array(colors[i])*255
+    if not suppress_plot:
+        plt.imshow(seg_img.astype(np.uint(8)))
+        plt.show()
+    return Image.fromarray(seg_img.astype(np.uint8))
+def get_color_seg(img, seq_classes, n_classes):
+    seg_img = np.zeros([img.shape[-2], img.shape[-1], 3])
+    colors = distinctipy.get_colors(n_classes+1, rng=0)
+    #distinctipy.color_swatch(colors)
+    bad_indices = [
+        0,  # background,
+        1,  # neck
+        # 2, skin
+        3,  # cloth
+        4,  # ear_r (images-space r)
+        5,  # ear_l
+        # 6 brow_r
+        # 7 brow_l
+        # 8,  # eye_r
+        # 9,  # eye_l
+        # 10 noise
+        # 11 mouth
+        # 12 lower_lip
+        # 13 upper_lip
+        14,  # hair,
+        # 15, glasses
+        16,  # ??
+        17,  # earring_r
+        18,  # ?
+    ]
+    for i in range(n_classes):
+        if i not in bad_indices:
+            seg_img[seq_classes[0, :, :] == i] = np.array(colors[i])*255
+    return Image.fromarray(seg_img.astype(np.uint8))
+def crop_gt_img(img, seq_classes, n_classes):
+    seg_img = np.zeros([img.shape[-2], img.shape[-1], 3])
+    colors = distinctipy.get_colors(n_classes+1, rng=0)
+    #distinctipy.color_swatch(colors)
+    bad_indices = [
+        0,  # background,
+        1,  # neck
+        # 2, skin
+        3,  # cloth
+        4, #ear_r (images-space r)
+        5, #ear_l
+        # 6 brow_r
+        # 7 brow_l
+        #8,  # eye_r
+        #9,  # eye_l
+        # 10 noise
+        # 11 mouth
+        # 12 lower_lip
+        # 13 upper_lip
+        14,  # hair,
+        # 15, glasses
+        16,  # ??
+        17,  # earring_r
+        18,  # ?
+    ]
+    for i in range(n_classes):
+        if i in bad_indices:
+            img[seq_classes[0, :, :] == i] = 0
+    #plt.imshow(img.astype(np.uint(8)))
+    #plt.show()
+    return img.astype(np.uint8)
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+face_detector = facer.face_detector('retinaface/mobilenet', device=device)
+face_parser = facer.face_parser('farl/celebm/448', device=device)  # optional "farl/lapa/448"
+def main(video_name : str):
+    out = f'{env_paths.PREPROCESSED_DATA}/{video_name}'
+    out_seg = f'{out}/seg_og/'
+    out_seg_annot = f'{out}/seg_non_crop_annotations/'
+    os.makedirs(out_seg, exist_ok=True)
+    os.makedirs(out_seg_annot, exist_ok=True)
+    folder = f'{out}/cropped/'  # '/home/giebenhain/GTA/data_kinect/color/'
+    frames = [f for f in os.listdir(folder) if f.endswith('.png') or f.endswith('.jpg')]
+    frames.sort()
+    if len(os.listdir(out_seg)) == len(frames):
+        print(f'''
+                        <<<<<<<< ALREADY COMPLETED SEGMENTATION FOR {video_name}, SKIPPING >>>>>>>>
+                        ''')
+        return
+    #for file in frames:
+    batch_size = 1
+    for i in range(len(frames)//batch_size):
+        image_stack = []
+        frame_stack = []
+        original_shapes = []
+        for j in range(batch_size):
+            file = frames[i * batch_size + j]
+            if os.path.exists(f'{out_seg_annot}/color_{file}.png'):
+                print('DONE')
+                continue
+            img = Image.open(f'{folder}/{file}')#.resize((512, 512))
+            og_size = img.size
+            image = facer.hwc2bchw(torch.from_numpy(np.array(img)[..., :3])).to(device=device)  # image: 1 x 3 x h x w
+            image_stack.append(image)
+            frame_stack.append(file[:-4])
+        for batch_idx in range(ceil(len(image_stack)/batch_size)):
+            image_batch = torch.cat(image_stack[batch_idx*batch_size:(batch_idx+1)*batch_size], dim=0)
+            frame_idx_batch = frame_stack[batch_idx*batch_size:(batch_idx+1)*batch_size]
+            og_shape_batch = original_shapes[batch_idx*batch_size:(batch_idx+1)*batch_size]
+            #if True:
+            try:
+                with torch.inference_mode():
+                    faces = face_detector(image_batch)
+                    torch.cuda.empty_cache()
+                    faces = face_parser(image_batch, faces, bbox_scale_factor=1.25)
+                    torch.cuda.empty_cache()
+                seg_logits = faces['seg']['logits']
+                back_ground = torch.all(seg_logits == 0, dim=1, keepdim=True).detach().squeeze(1).cpu().numpy()
+                seg_probs = seg_logits.softmax(dim=1)  # nfaces x nclasses x h x w
+                seg_classes = seg_probs.argmax(dim=1).detach().cpu().numpy().astype(np.uint8)
+                seg_classes[back_ground] = seg_probs.shape[1] + 1
+                for _iidx in range(seg_probs.shape[0]):
+                    frame = frame_idx_batch[_iidx]
+                    iidx = faces['image_ids'][_iidx].item()
+                    try:
+                        I_color = viz_results(image_batch[iidx:iidx+1], seq_classes=seg_classes[_iidx:_iidx+1], n_classes=seg_probs.shape[1] + 1, suppress_plot=True)
+                        I_color.save(f'{out_seg_annot}/color_{frame}.png')
+                    except Exception as ex:
+                        pass
+                    I = Image.fromarray(seg_classes[_iidx])
+                    I.save(f'{out_seg}/{frame}.png')
+                torch.cuda.empty_cache()
+            except Exception as exx:
+                traceback.print_exc()
+                continue
+if __name__ == '__main__':
+    tyro.cli(main)

scripts/run_preprocessing.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+import tyro
+from pixel3dmm import env_paths
+def main(video_or_images_path : str):
+    if os.path.isdir(video_or_images_path):
+        vid_name = video_or_images_path.split('/')[-1]
+    else:
+        vid_name = video_or_images_path.split('/')[-1][:-4]
+    os.system(f'cd {env_paths.CODE_BASE}/scripts/ ; python run_cropping.py --video_or_images_path {video_or_images_path}')
+    os.system(f'cd {env_paths.CODE_BASE}/src/pixel3dmm/preprocessing/MICA ; python demo.py -video_name {vid_name}')
+    os.system(f'cd {env_paths.CODE_BASE}/scripts/ ; python run_facer_segmentation.py --video_name {vid_name}')
+if __name__ == '__main__':
+    tyro.cli(main)

scripts/track.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import os
+import wandb
+from omegaconf import OmegaConf
+from pixel3dmm.tracking.tracker import Tracker
+from pixel3dmm import env_paths
+def main(cfg):
+    tracker = Tracker(cfg)
+    tracker.run()
+if __name__ == '__main__':
+    base_conf = OmegaConf.load(f'{env_paths.CODE_BASE}/configs/tracking.yaml')
+    cli_conf = OmegaConf.from_cli()
+    cfg = OmegaConf.merge(base_conf, cli_conf)
+    #os.makedirs('/home/giebenhain/debug_wandb_p3dmm/', exist_ok=True)
+    #wandb.init(
+    #   dir='/home/giebenhain/debug_wandb_p3dmm/',
+    #   #config=config,
+    #   project='face-tracking-p3dmm',
+    #   #tags=wandb_tags,
+    #   #name=cfg.config_name,
+#
+    #)
+    main(cfg)

scripts/viz_head_centric_cameras.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import os
+import tyro
+import mediapy
+import torch
+import numpy as np
+import pyvista as pv
+import trimesh
+from PIL import Image
+from dreifus.matrix import Intrinsics, Pose, CameraCoordinateConvention, PoseType
+from dreifus.pyvista import add_camera_frustum, render_from_camera
+from pixel3dmm.utils.utils_3d import rotation_6d_to_matrix
+from pixel3dmm.env_paths import PREPROCESSED_DATA, TRACKING_OUTPUT
+def main(vid_name : str,
+         HEAD_CENTRIC : bool = True,
+         DO_PROJECTION_TEST : bool = False,
+         ):
+    tracking_dir = f'{TRACKING_OUTPUT}/{vid_name}_nV1_noPho_uv2000.0_n1000.0'
+    meshes = [f for f in os.listdir(f'{tracking_dir}/mesh/') if f.endswith('.ply') and not 'canonical' in f]
+    meshes.sort()
+    ckpts = [f for f in os.listdir(f'{tracking_dir}/checkpoint/') if f.endswith('.frame')]
+    ckpts.sort()
+    N_STEPS = len(meshes)
+    pl = pv.Plotter()
+    vid_frames = []
+    for i in range(N_STEPS):
+        ckpt = torch.load(f'{tracking_dir}/checkpoint/{ckpts[i]}', weights_only=False)
+        mesh = trimesh.load(f'{tracking_dir}/mesh/{meshes[i]}', process=False)
+        head_rot = rotation_6d_to_matrix(torch.from_numpy(ckpt['flame']['R'])).numpy()[0]
+        if not HEAD_CENTRIC:
+            # move mesh from FLAME Space into World Space
+            mesh.vertices = mesh.vertices @ head_rot.T + (ckpt['flame']['t'])
+        else:
+            # undo neck rotation
+            verts_hom = np.concatenate([mesh.vertices, np.ones_like(mesh.vertices[..., :1])], axis=-1)
+            verts_hom = verts_hom @ np.linalg.inv(ckpt['joint_transforms'][0, 1, :, :]).T
+            mesh.vertices = verts_hom[..., :3]
+        extr_open_gl_world_to_cam = np.eye(4)
+        extr_open_gl_world_to_cam[:3, :3] = ckpt['camera']['R_base_0'][0]
+        extr_open_gl_world_to_cam[:3, 3] = ckpt['camera']['t_base_0'][0]
+        if HEAD_CENTRIC:
+            flame2world = np.eye(4)
+            flame2world[:3, :3] = head_rot
+            flame2world[:3, 3] = np.squeeze(ckpt['flame']['t'])
+            #TODO include neck transform as well
+            extr_open_gl_world_to_cam = extr_open_gl_world_to_cam @ flame2world @ ckpt['joint_transforms'][0, 1, :, :]
+        extr_open_gl_world_to_cam = Pose(extr_open_gl_world_to_cam,
+                                         camera_coordinate_convention=CameraCoordinateConvention.OPEN_GL,
+                                         pose_type=PoseType.WORLD_2_CAM)
+        intr = np.eye(3)
+        intr[0, 0] = ckpt['camera']['fl'][0, 0] * 256
+        intr[1, 1] = ckpt['camera']['fl'][0, 0] * 256
+        intr[:2, 2] = ckpt['camera']['pp'][0] * (256/2+0.5) + 256/2 + 0.5
+        intr = Intrinsics(intr)
+        pl.add_mesh(mesh, color=[(i/N_STEPS), 0, ((N_STEPS-i)/N_STEPS)])
+        add_camera_frustum(pl, extr_open_gl_world_to_cam, intr, color=[(i/N_STEPS), 0, ((N_STEPS-i)/N_STEPS)])
+        if DO_PROJECTION_TEST:
+            pll = pv.Plotter(off_screen=True, window_size=(256, 256))
+            pll.add_mesh(mesh)
+            img = render_from_camera(pll, extr_open_gl_world_to_cam, intr)
+            gt_img = np.array(Image.open(f'{PREPROCESSED_DATA}/{vid_name}/cropped/{i:05d}.jpg').resize((256, 256)))
+            alpha = img[..., 3]
+            overlay = (gt_img *0.5 + img[..., :3]*0.5).astype(np.uint8)
+            vid_frames.append(overlay)
+    pl.show()
+    if DO_PROJECTION_TEST:
+        mediapy.write_video(f'{tracking_dir}/projection_test.mp4', images=vid_frames)
+if __name__ == '__main__':
+    tyro.cli(main)

setup.py ADDED Viewed

	@@ -0,0 +1,7 @@

+#!/usr/bin/env python
+import setuptools
+if __name__ == "__main__":
+    # Still necessary, otherwise we get a pip error
+    setuptools.setup()

src/__init__.py ADDED Viewed

File without changes

src/pixel3dmm/__init__.py ADDED Viewed

File without changes

src/pixel3dmm/env_paths.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import json
+from pathlib import Path
+from environs import Env
+env = Env(expand_vars=True)
+env_file_path = Path(f"{Path.home()}/.config/pixel3dmm/.env")
+if env_file_path.exists():
+    env.read_env(str(env_file_path), recurse=False)
+with env.prefixed("PIXEL3DMM_"):
+    CODE_BASE = env("CODE_BASE")
+    PREPROCESSED_DATA = env("PREPROCESSED_DATA")
+    TRACKING_OUTPUT = env("TRACKING_OUTPUT")
+head_template = f'{CODE_BASE}/assets/head_template.obj'
+head_template_color = f'{CODE_BASE}/assets/head_template_color.obj'
+head_template_ply = f'{CODE_BASE}/assets/test_rigid.ply'
+VALID_VERTICES_WIDE_REGION = f'{CODE_BASE}/assets/uv_valid_verty_noEyes_debug.npy'
+VALID_VERTS_UV_MESH = f'{CODE_BASE}/assets/uv_valid_verty.npy'
+VERTEX_WEIGHT_MASK = f'{CODE_BASE}/assets/flame_vertex_weights.npy'
+MIRROR_INDEX = f'{CODE_BASE}/assets/flame_mirror_index.npy'
+EYE_MASK = f'{CODE_BASE}/assets/uv_mask_eyes.png'
+FLAME_UV_COORDS = f'{CODE_BASE}/assets/flame_uv_coords.npy'
+VALID_VERTS_NARROW = f'{CODE_BASE}/assets/uv_valid_verty_noEyes.npy'
+VALID_VERTS = f'{CODE_BASE}/assets/uv_valid_verty_noEyes_noEyeRegion_debug_wEars.npy'
+FLAME_ASSETS = f'{CODE_BASE}/src/pixel3dmm/preprocessing/MICA/data/'
+# paths to pretrained pixel3dmm checkpoints
+CKPT_UV_PRED = f'{CODE_BASE}/pretrained_weights/uv.ckpt'
+CKPT_N_PRED = f'{CODE_BASE}/pretrained_weights/normals.ckpt'

src/pixel3dmm/lightning/p3dmm_network.py ADDED Viewed

The diff for this file is too large to render. See raw diff

src/pixel3dmm/lightning/p3dmm_system.py ADDED Viewed

	@@ -0,0 +1,491 @@

+from PIL import Image, ImageDraw
+import os
+import torch
+import numpy as np
+import pytorch_lightning as L
+import torch.nn as nn
+from pixel3dmm.lightning.utils import CosineWarmupScheduler, WarmupScheduler
+from pixel3dmm.lightning.p3dmm_network import Network
+from pixel3dmm import env_paths
+def fov_to_ixt(fov, reso=512):
+    ixt = torch.eye(3).float().unsqueeze(0).repeat(fov.shape[0], 1, 1).to(fov.device)
+    ixt[:, 0, 2] = reso / 2
+    ixt[:, 1, 2] = reso / 2
+    focal = .5 * reso / torch.tan(.5 * fov)
+    ixt[:, 0, 0] = focal
+    ixt[:, 1, 1] = focal
+    return ixt
+def batch_rodrigues(
+    rot_vecs: torch.Tensor,
+    epsilon: float = 1e-8,
+) -> torch.Tensor:
+    ''' Calculates the rotation matrices for a batch of rotation vectors
+        Parameters
+        ----------
+        rot_vecs: torch.tensor Nx3
+            array of N axis-angle vectors
+        Returns
+        -------
+        R: torch.tensor Nx3x3
+            The rotation matrices for the given axis-angle parameters
+    '''
+    batch_size = rot_vecs.shape[0]
+    device, dtype = rot_vecs.device, rot_vecs.dtype
+    angle = torch.norm(rot_vecs + 1e-8, dim=1, keepdim=True)
+    rot_dir = rot_vecs / angle
+    cos = torch.unsqueeze(torch.cos(angle), dim=1)
+    sin = torch.unsqueeze(torch.sin(angle), dim=1)
+    # Bx1 arrays
+    rx, ry, rz = torch.split(rot_dir, 1, dim=1)
+    K = torch.zeros((batch_size, 3, 3), dtype=dtype, device=device)
+    zeros = torch.zeros((batch_size, 1), dtype=dtype, device=device)
+    K = torch.cat([zeros, -rz, ry, rz, zeros, -rx, -ry, rx, zeros], dim=1) \
+        .view((batch_size, 3, 3))
+    ident = torch.eye(3, dtype=dtype, device=device).unsqueeze(dim=0)
+    rot_mat = ident + sin * K + (1 - cos) * torch.bmm(K, K)
+    return rot_mat
+def pad_to_3_channels(img):
+    if img.shape[-1] == 3:
+        return img
+    elif img.shape[-1] == 1:
+        return np.concatenate([img, np.zeros_like(img[..., :1]), np.zeros_like(img[..., :1])], axis=-1)
+    elif img.shape[-1] == 2:
+        return np.concatenate([img, np.zeros_like(img[..., :1])], axis=-1)
+    else:
+        raise ValueError('too many dimensions in prediction type!')
+class system(L.LightningModule):
+    def __init__(self, cfg):
+        super().__init__()
+        self.glctx = None
+        self.cfg = cfg
+        self.net = Network(cfg)
+        vertex_weight_mask = np.load(f'{env_paths.VERTEX_WEIGHT_MASK}')
+        self.register_buffer('vertex_weight_mask', torch.from_numpy(vertex_weight_mask).float())
+        self.validation_step_outputs = []
+        self.validation_step_outputs_per_dataset = []
+        self.dataset_types = [
+            'facescape',
+            'nphm',
+            'ava',
+        ]
+        self.do_eval = True
+        self.alpha = 1.0
+        self.save_hyperparameters()
+        self.loss_weights = {
+            'albedo': 1.0,  # 1.0/0.13,
+            'depth': 1.0,
+            'pos_map': 1.0,  # 1.0/0.0006,
+            'pos_map_can': 1.0,  # 1.0/0.0006,
+            'normals': 0.1,  # TODO achtung #1.0/0.03,
+            'normals_can': 1.0,  # 1.0/0.03,
+            'uv_map': 10.0,  # 1.0/0.001,
+            'nocs': 1.0,  # 1.0/0.0006,
+        }
+    def training_step(self, batch, batch_idx):
+        output, conf = self.net(batch)
+        B = output[list(output.keys())[0]].shape[0]
+        V = output[list(output.keys())[0]].shape[1]
+        c_map = None
+        losses = {}
+        if 'normals' in self.cfg.model.prediction_type:
+            gt_normals = batch['normals'].permute(0, 1, 4, 2, 3)
+            if conf is None:
+                losses['normals'] = (batch['tar_msk'].unsqueeze(2) * (gt_normals - output['normals'])).abs().mean()
+            else:
+                losses['normals'] = (batch['tar_msk'].unsqueeze(2) * (
+                        c_map * (gt_normals - output['normals']) - self.alpha * torch.log(c_map))).abs().mean()
+            if self.cfg.model.pred_disentangled:
+                gt_normals_can = batch['normals_can'].permute(0, 1, 4, 2, 3)
+                if conf is None:
+                    losses['normals_can'] = (
+                            batch['tar_msk'].unsqueeze(2) * (gt_normals_can - output['normals_can'])).abs().mean()
+                else:
+                    losses['normals_can'] = (batch['tar_msk'].unsqueeze(2) * (
+                        c_map * (gt_normals_can - output['normals_can']) - self.alpha * torch.log(
+                        c_map))).abs().mean()
+        for prediction_type in ['uv_map',  'depth', 'nocs']:
+            if prediction_type in self.cfg.model.prediction_type:
+                weight_mask = torch.ones_like(output[prediction_type])
+                if prediction_type == 'uv_map' or (prediction_type == 'nocs'):  # ATTENTION: only for nocs?
+                    weight_mask = batch['uv_masks'].unsqueeze(2).float() + 0.2
+                gt_pos_map = batch[prediction_type].permute(0, 1, 4, 2, 3)
+                if conf is None:
+                    losses[prediction_type] = (weight_mask * batch['tar_msk'].unsqueeze(2) * (
+                            gt_pos_map - output[prediction_type])).abs().mean()
+                else:
+                    losses[prediction_type] = (weight_mask * batch['tar_msk'].unsqueeze(2) * (
+                            c_map * (gt_pos_map - output[prediction_type]) - self.alpha * torch.log(
+                            c_map))).abs().mean()
+        total_loss = 0
+        loss = 0
+        for k in losses.keys():
+            if k in self.loss_weights:
+                loss += self.loss_weights[k] * losses[k]
+            else:
+                loss += losses[k]
+        self.log(f'train/loss', loss.item(), prog_bar=False)
+        # for prediction_type in self.cfg.model.prediction_type:
+        for k in losses.keys():
+            if k in self.cfg.model.prediction_type:
+                self.log(f'train/loss_{k}', losses[k])
+        if self.cfg.model.pred_disentangled:
+            for k in losses.keys():
+                if k[:-4] in self.cfg.model.prediction_type:
+                    self.log(f'train/loss_{k}', losses[k])
+        self.log('lr', self.trainer.optimizers[0].param_groups[0]['lr'])
+        do_vis = (0 == self.trainer.global_step % 300) if os.path.exists('/mnt/rohan') else (
+                0 == self.trainer.global_step % 3000)
+        if do_vis and (self.trainer.local_rank == 0):
+            output, conf = self.net(batch)
+            self.vis_results({k: v.detach() for (k, v) in output.items()}, conf, batch, prex='train')
+            self.do_eval = True
+            torch.cuda.empty_cache()
+        return loss
+    def optimizer_step(
+        self,
+        *args, **kwargs
+    ):
+        """
+        Skipping updates in case of unstable gradients
+        https://github.com/Lightning-AI/lightning/issues/4956
+        """
+        valid_gradients = True
+        grads = [
+            param.grad.detach().flatten()
+            for param in self.parameters()
+            if param.grad is not None
+        ]
+        if len(grads) > 0:
+            norm = torch.cat(grads).norm()
+            self.log(f'grad/norm', norm.item(), prog_bar=False)  # , sync_dist=True)
+            if (norm > 10000 and self.global_step > 20 or torch.isnan(norm)):
+                valid_gradients = False
+            if not valid_gradients:
+                print(
+                    f'detected inf or nan values in gradients. not updating model parameters, OTHER FUNCTION threshold: {10000}, value: {norm.item()}')
+                self.zero_grad()
+                for param in self.parameters():
+                    param.grad = None
+        L.LightningModule.optimizer_step(self, *args, **kwargs)
+    def validation_step(self, batch, batch_idx):
+        self.net.eval()
+        output, conf = self.net(batch)
+        B = output[list(output.keys())[0]].shape[0]
+        V = output[list(output.keys())[0]].shape[1]
+        loss_dict = {}
+        dataset_indices = {}
+        val_losses = {}
+        for prediction_type in ['uv_map', 'depth', 'nocs']:
+            if prediction_type in self.cfg.model.prediction_type:
+                gt_pos_map = batch[prediction_type].permute(0, 1, 4, 2, 3)
+                weight_mask = torch.ones_like(output[prediction_type])
+                if prediction_type == 'uv_map' or (prediction_type == 'nocs'):  # ATTENTION: only for nocs?
+                    weight_mask = batch['uv_masks'].unsqueeze(2).float() + 0.2
+                val_losses[prediction_type] = (weight_mask * batch['tar_msk'].unsqueeze(2) * (
+                        gt_pos_map - output[prediction_type])).abs().mean()
+                loss_dict[f'loss/{prediction_type}'] = val_losses[prediction_type].item()
+        if 'normals' in self.cfg.model.prediction_type:
+            prediction_type = 'normals'
+            gt_pos_map = batch[prediction_type].permute(0, 1, 4, 2, 3)
+            val_losses[prediction_type] = (
+                batch['tar_msk'].unsqueeze(2) * (gt_pos_map - output[prediction_type])).abs().mean()
+            loss_dict[f'loss/{prediction_type}'] = val_losses[prediction_type].item()
+            if self.cfg.model.pred_disentangled:
+                prediction_type = 'normals_can'
+                gt_pos_map = batch[prediction_type].permute(0, 1, 4, 2, 3)
+                val_losses[prediction_type] = (
+                    batch['tar_msk'].unsqueeze(2) * (gt_pos_map - output[prediction_type])).abs().mean()
+                loss_dict[f'loss/{prediction_type}'] = val_losses[prediction_type].item()
+        # if self.cfg.model.prediction_type == 'depth_si':
+        #    loss, pred_scale, target_scale = simae2_loss(output, batch['depth'].permute(0, 1, 4, 2, 3), batch['tar_msk'].unsqueeze(2), c_map=c_map, alpha=self.alpha)
+        #    self.validation_step_outputs.append({'loss': loss.item()})
+        val_loss = 0
+        for prediction_type in self.cfg.model.prediction_type:
+            val_loss += self.loss_weights[prediction_type] * val_losses[prediction_type]
+        loss_dict['loss/total'] = val_loss.item()
+        self.validation_step_outputs.append(loss_dict)
+        #print('GLOBAL_STEP:', self.trainer.global_step)
+        if self.do_eval and self.trainer.local_rank == 0:
+            output, conf = self.net(batch)
+            if conf is not None:
+                conf = conf.detach()
+            tmp_dict = {k: v.detach() for (k, v) in output.items()}
+            self.vis_results(tmp_dict, conf, batch, prex='val')
+            self.do_eval = False
+            torch.cuda.empty_cache()
+        return val_loss
+    def on_validation_epoch_end(self):
+        # for key in keys:
+        #    prog_bar = True if key in ['psnr','mask','depth'] else False
+        metric_mean = np.stack([np.array(x['loss/total']) for x in self.validation_step_outputs]).mean()
+        self.log(f'val/loss', metric_mean, prog_bar=False, sync_dist=True)
+        if self.net.n_facial_components == 0:
+            for prediction_type in self.cfg.model.prediction_type:
+                metric_mean_pred_type = np.stack(
+                    [np.array(x[f'loss/{prediction_type}']) for x in self.validation_step_outputs]).mean()
+                self.log(f'val/loss_{prediction_type}', metric_mean_pred_type, sync_dist=True)
+        for dataset_type in self.dataset_types:
+            for loss_type in self.validation_step_outputs[0].keys():
+                content = [np.array(x[dataset_type][loss_type]) for x in self.validation_step_outputs_per_dataset if loss_type in x[dataset_type]]
+                if len(content) > 0:
+                    metric_mean = np.nanmean(np.stack(content))
+                    self.log(f'val_{dataset_type}/{loss_type}', metric_mean, sync_dist=True)
+        self.validation_step_outputs.clear()  # free memory
+        torch.cuda.empty_cache()
+    def vis_results(self, output, conf, batch, prex):
+        out_folder = f'{self.cfg.reconstruction_folder}/{prex}_{self.trainer.global_step}/'
+        os.makedirs(out_folder, exist_ok=True)
+        output_gpu = {k: v for k, v in output.items()}
+        output = {k: v.cpu() for k, v in output.items()}
+        if self.net.n_facial_components == 0:
+            output_rows = {}
+            for predictiont_type in ['normals', 'albedo', 'uv_map', 'nocs']:
+                if predictiont_type in self.cfg.model.prediction_type:
+                    output_rows[predictiont_type] = (batch['tar_msk'][..., None].float() * batch[predictiont_type]).permute(0, 1, 4, 2, 3).detach().cpu()
+                if predictiont_type in self.cfg.model.prediction_type and predictiont_type == 'normals' and self.cfg.model.pred_disentangled:
+                    output_rows['normals_can'] = (batch['tar_msk'][..., None].float() * batch['normals_can']).permute(0, 1, 4, 2, 3).detach().cpu()
+            gt_rgb = batch['tar_rgb'].permute(0, 1, 4, 2, 3).detach().cpu()
+            for i_batch in range(output_rows[self.cfg.model.prediction_type[0]].shape[0]):
+                modalities = []
+                prediction_types = self.cfg.model.prediction_type.copy()  # ['pos_map', 'normals', 'albedo', 'uv_map']
+                if self.cfg.model.pred_disentangled and "pos_map" in prediction_types:
+                    prediction_types.append('pos_map_can')
+                if self.cfg.model.pred_disentangled and "normals" in prediction_types:
+                    prediction_types.append('normals_can')
+                if self.cfg.model.pred_disentangled and "uv_map" in prediction_types:
+                    prediction_types.append('disps')
+                for prediction_type in prediction_types:
+                    rows = []
+                    for i_view in range(output_rows[prediction_type].shape[1]):
+                        with torch.no_grad():
+                            mini = min(output_rows[prediction_type][i_batch, i_view].min().item(),
+                                       output[prediction_type][i_batch, i_view].min().item())
+                            tmp_gt_pos_map = output_rows[prediction_type][i_batch, i_view].clone() - mini
+                            tmp_output = output[prediction_type][i_batch, i_view].clone() - mini
+                            maxi = max(tmp_gt_pos_map.max().item(), tmp_output.max().item())
+                            tmp_gt_pos_map = tmp_gt_pos_map / maxi
+                            tmp_output = tmp_output / maxi
+                            catted = [
+                                gt_rgb[i_batch, i_view].permute(1, 2, 0).detach().cpu().numpy(),
+                                pad_to_3_channels(
+                                    (batch['tar_msk'][i_batch, i_view].cpu() * tmp_gt_pos_map.cpu()).permute(1, 2,
+                                                                                                             0).detach().cpu().numpy()),
+                                pad_to_3_channels(tmp_output.permute(1, 2, 0).detach().cpu().float().numpy()),
+                            ]
+                            if conf is not None:
+                                mini_conf = conf[i_batch, i_view].min()
+                                tmp_conf = conf[i_batch, i_view].clone() - mini_conf
+                                maxi_conf = tmp_conf.max()
+                                tmp_conf = tmp_conf / maxi_conf
+                                catted.append(
+                                    pad_to_3_channels(tmp_conf.permute(1, 2, 0).detach().cpu().float().numpy()))
+                            catted = (np.concatenate(catted, axis=1) * 255).astype(np.uint8)
+                            rows.append(catted)
+                    modalities.append(np.concatenate(rows, axis=0))
+                catted = Image.fromarray(np.concatenate(modalities, axis=0))
+                scene_name = batch['meta']['scene'][i_batch]
+                catted.save(f'{out_folder}/{scene_name}.png')  # , quality=90)
+        keys = list(output.keys())
+        for k in keys:
+            del output[k]
+        del output
+        del gt_rgb
+        keys = list(output_rows.keys())
+        for k in keys:
+            del output_rows[k]
+        del output_rows
+        torch.cuda.empty_cache()
+        # pll.show()
+    def num_steps(self) -> int:
+        """Get number of steps"""
+        # Accessing _data_source is flaky and might break
+        dataset = self.trainer.fit_loop._data_source.dataloader()
+        dataset_size = len(dataset)
+        num_devices = max(1, self.trainer.num_devices)
+        num_steps = dataset_size * self.trainer.max_epochs * self.cfg.train.limit_train_batches // (
+                self.trainer.accumulate_grad_batches * num_devices)
+        return int(num_steps)
+    def configure_optimizers(self):
+        decay_params, no_decay_params = [], []
+        invalid_params = []
+        all_backbone_params = []
+        all_non_backbone_params = []
+        backbone_params = []
+        backbone_params_no_decay = []
+        # add all bias and LayerNorm params to no_decay_params
+        for name, module in self.named_modules():
+            if name == 'flame' or name == 'flame_generic':
+                invalid_params.extend([p for p in module.parameters()])
+            else:
+                if isinstance(module, nn.LayerNorm):
+                    if 'img_encoder' in name:
+                        backbone_params_no_decay.extend([p for p in module.parameters()])
+                    else:
+                        no_decay_params.extend([p for p in module.parameters()])
+                elif hasattr(module, 'bias') and module.bias is not None:
+                    if 'img_encoder' in name:
+                        backbone_params_no_decay.append(module.bias)
+                    else:
+                        no_decay_params.append(module.bias)
+                if 'img_encoder' in name:
+                    all_backbone_params.extend([p for p in module.parameters()])
+                else:
+                    all_non_backbone_params.extend([p for p in module.parameters()])
+        # add remaining parameters to decay_params
+        _no_decay_ids = set(map(id, no_decay_params))
+        _all_backbone_ids = set(map(id, all_backbone_params))
+        _all_non_backbone_ids = set(map(id, all_non_backbone_params))
+        _backbone_no_decay_ids = set(map(id, backbone_params_no_decay))
+        _invalid_ids = set(map(id, invalid_params))
+        decay_params = [p for p in self.parameters() if
+                        id(p) not in _no_decay_ids and id(p) not in _all_backbone_ids and id(p) not in _invalid_ids]
+        decay_params_backbone = [p for p in self.parameters() if
+                                 id(p) not in _backbone_no_decay_ids and id(p) not in _all_non_backbone_ids and id(
+                                     p) not in _invalid_ids]
+        no_decay_params = [p for p in no_decay_params if id(p) not in _invalid_ids]
+        no_decay_params_backbone = [p for p in backbone_params_no_decay if id(p) not in _invalid_ids]
+        # filter out parameters with no grad
+        decay_params = list(filter(lambda p: p.requires_grad, decay_params))
+        no_decay_params = list(filter(lambda p: p.requires_grad, no_decay_params))
+        decay_params_backbone = list(filter(lambda p: p.requires_grad, decay_params_backbone))
+        no_decay_params_backbone = list(filter(lambda p: p.requires_grad, no_decay_params_backbone))
+        # Optimizer
+        opt_groups = [
+            {'params': decay_params, 'weight_decay': self.cfg.train.weight_decay, 'lr': self.cfg.train.lr},
+            {'params': decay_params_backbone, 'weight_decay': self.cfg.train.weight_decay,
+             'lr': self.cfg.train.lr_backbone},
+            {'params': no_decay_params, 'weight_decay': 0.0, 'lr': self.cfg.train.lr},
+            {'params': no_decay_params_backbone, 'weight_decay': 0.0, 'lr': self.cfg.train.lr_backbone},
+        ]
+        optimizer = torch.optim.AdamW(
+            opt_groups,
+            betas=(self.cfg.train.beta1, self.cfg.train.beta2),
+        )
+        total_global_batches = self.num_steps()
+        scheduler = CosineWarmupScheduler(
+            optimizer=optimizer,
+            warmup_iters=self.cfg.train.warmup_iters,
+            max_iters=total_global_batches,
+        )
+        return {"optimizer": optimizer,
+                "lr_scheduler": {
+                    'scheduler': scheduler,
+                    'interval': 'step'  # or 'epoch' for epoch-level updates
+                }}

src/pixel3dmm/lightning/utils.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import torch, os, json, math
+import numpy as np
+from torch.optim.lr_scheduler import LRScheduler
+def getProjectionMatrix(znear, zfar, fovX, fovY):
+    tanHalfFovY = torch.tan((fovY / 2))
+    tanHalfFovX = torch.tan((fovX / 2))
+    P = torch.zeros(4, 4)
+    z_sign = 1.0
+    P[0, 0] = 1 / tanHalfFovX
+    P[1, 1] = 1 / tanHalfFovY
+    P[3, 2] = z_sign
+    P[2, 2] = z_sign * zfar / (zfar - znear)
+    P[2, 3] = -(zfar * znear) / (zfar - znear)
+    return P
+class MiniCam:
+    def __init__(self, c2w, width, height, fovy, fovx, znear, zfar, device):
+        # c2w (pose) should be in NeRF convention.
+        self.image_width = width
+        self.image_height = height
+        self.FoVy = fovy
+        self.FoVx = fovx
+        self.znear = znear
+        self.zfar = zfar
+        w2c = torch.inverse(c2w)
+        # rectify...
+        # w2c[1:3, :3] *= -1
+        # w2c[:3, 3] *= -1
+        self.world_view_transform = w2c.transpose(0, 1).to(device)
+        self.projection_matrix = (
+            getProjectionMatrix(
+                znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy
+            )
+            .transpose(0, 1)
+            .to(device)
+        )
+        self.full_proj_transform = (self.world_view_transform @ self.projection_matrix).float()
+        self.camera_center = -c2w[:3, 3].to(device)
+def rotation_matrix_to_quaternion(R):
+    tr = R[0, 0] + R[1, 1] + R[2, 2]
+    if tr > 0:
+        S = torch.sqrt(tr + 1.0) * 2.0
+        qw = 0.25 * S
+        qx = (R[2, 1] - R[1, 2]) / S
+        qy = (R[0, 2] - R[2, 0]) / S
+        qz = (R[1, 0] - R[0, 1]) / S
+    elif (R[0, 0] > R[1, 1]) and (R[0, 0] > R[2, 2]):
+        S = torch.sqrt(1.0 + R[0, 0] - R[1, 1] - R[2, 2]) * 2.0
+        qw = (R[2, 1] - R[1, 2]) / S
+        qx = 0.25 * S
+        qy = (R[0, 1] + R[1, 0]) / S
+        qz = (R[0, 2] + R[2, 0]) / S
+    elif R[1, 1] > R[2, 2]:
+        S = torch.sqrt(1.0 + R[1, 1] - R[0, 0] - R[2, 2]) * 2.0
+        qw = (R[0, 2] - R[2, 0]) / S
+        qx = (R[0, 1] + R[1, 0]) / S
+        qy = 0.25 * S
+        qz = (R[1, 2] + R[2, 1]) / S
+    else:
+        S = torch.sqrt(1.0 + R[2, 2] - R[0, 0] - R[1, 1]) * 2.0
+        qw = (R[1, 0] - R[0, 1]) / S
+        qx = (R[0, 2] + R[2, 0]) / S
+        qy = (R[1, 2] + R[2, 1]) / S
+        qz = 0.25 * S
+    return torch.stack([qw, qx, qy, qz], dim=1)
+def rotate_quaternions(q, R):
+    # Convert quaternions to rotation matrices
+    q = torch.cat([q[:, :1], -q[:, 1:]], dim=1)
+    q = torch.cat([q[:, :3], q[:, 3:] * -1], dim=1)
+    rotated_R = torch.matmul(torch.matmul(q, R), q.inverse())
+    # Convert the rotated rotation matrices back to quaternions
+    return rotation_matrix_to_quaternion(rotated_R)
+class WarmupScheduler(LRScheduler):
+    def __init__(self, optimizer, warmup_iters: int, max_iters: int, initial_lr: float = 1e-10, last_iter: int = -1):
+        self.warmup_iters = warmup_iters
+        self.max_iters = max_iters
+        self.initial_lr = initial_lr
+        super().__init__(optimizer, last_iter)
+    def get_lr(self):
+        return [
+            self.initial_lr + (base_lr - self.initial_lr) * min(self._step_count / self.warmup_iters, 1)
+            for base_lr in self.base_lrs]
+# this function is borrowed from OpenLRM
+class CosineWarmupScheduler(LRScheduler):
+    def __init__(self, optimizer, warmup_iters: int, max_iters: int, initial_lr: float = 1e-10, last_iter: int = -1):
+        self.warmup_iters = warmup_iters
+        self.max_iters = max_iters
+        self.initial_lr = initial_lr
+        super().__init__(optimizer, last_iter)
+    def get_lr(self):
+        if self._step_count <= self.warmup_iters:
+            return [
+                self.initial_lr + (base_lr - self.initial_lr) * self._step_count / self.warmup_iters
+                for base_lr in self.base_lrs]
+        else:
+            cos_iter = self._step_count - self.warmup_iters
+            cos_max_iter = self.max_iters - self.warmup_iters
+            cos_theta = cos_iter / cos_max_iter * math.pi
+            cos_lr = [base_lr * (1 + math.cos(cos_theta)) / 2 for base_lr in self.base_lrs]
+            return cos_lr

src/pixel3dmm/preprocessing/__init__.py ADDED Viewed

File without changes

src/pixel3dmm/preprocessing/pipnet_utils.py ADDED Viewed

	@@ -0,0 +1,348 @@

+import importlib
+import os
+import torch.nn.parallel
+import torch.utils.data
+import torchvision.transforms as transforms
+from pixel3dmm.preprocessing.PIPNet.FaceBoxesV2.faceboxes_detector import *
+from pixel3dmm.preprocessing.PIPNet.lib.networks import *
+from pixel3dmm.preprocessing.PIPNet.lib.functions import *
+from pixel3dmm.preprocessing.PIPNet.lib.mobilenetv3 import mobilenetv3_large
+from pixel3dmm import env_paths
+def smooth(x, window_len=11, window='hanning'):
+    """smooth the data using a window with requested size.
+    This method is based on the convolution of a scaled window with the signal.
+    The signal is prepared by introducing reflected copies of the signal
+    (with the window size) in both ends so that transient parts are minimized
+    in the begining and end part of the output signal.
+    input:
+        x: the input signal
+        window_len: the dimension of the smoothing window; should be an odd integer
+        window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
+            flat window will produce a moving average smoothing.
+    output:
+        the smoothed signal
+    example:
+    t=linspace(-2,2,0.1)
+    x=sin(t)+randn(len(t))*0.1
+    y=smooth(x)
+    see also:
+    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
+    scipy.signal.lfilter
+    TODO: the window parameter could be the window itself if an array instead of a string
+    NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y.
+    """
+    if x.ndim != 1:
+        raise ValueError("smooth only accepts 1 dimension arrays.")
+    if x.size < window_len:
+        raise ValueError( "Input vector needs to be bigger than window size.")
+    if window_len < 3:
+        return x
+    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
+        raise ValueError( "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
+    s = np.r_[x[window_len - 1:0:-1], x, x[-2:-window_len - 1:-1]]
+    # print(len(s))
+    if window == 'flat':  # moving average
+        w = np.ones(window_len, 'd')
+    else:
+        w = eval('np.' + window + '(window_len)')
+    y = np.convolve(w / w.sum(), s, mode='valid')
+    return y
+def get_cstm_crop(image, detections, detections_max, max_bbox : bool = False):
+    #Image.fromarray(image).show()
+    image_width = image.shape[1]
+    image_height = image.shape[0]
+    det_box_scale = 1.42 #2.0#1.42
+    if detections[4]*1.42 * detections[5]*1.42 < detections_max[4] * 1.1 * detections_max[5] * 1.1:
+        detections = detections_max
+        det_box_scale = 1.1
+    det_xmin = detections[2]
+    det_ymin = detections[3]
+    det_width = detections[4]
+    det_height = detections[5]
+    if det_width > det_height:
+        det_ymin -= (det_width - det_height)//2
+        det_height = det_width
+    if det_width < det_height:
+        det_xmin -= (det_height - det_width)//2
+        det_width = det_height
+    det_xmax = det_xmin + det_width - 1
+    det_ymax = det_ymin + det_height - 1
+    det_xmin -= int(det_width * (det_box_scale - 1) / 2)
+    det_ymin -= int(det_height * (det_box_scale - 1) / 2)
+    det_xmax += int(det_width * (det_box_scale - 1) / 2)
+    det_ymax += int(det_height * (det_box_scale - 1) / 2)
+    if det_xmin < 0 or det_ymin < 0:
+        min_overflow = min(det_xmin, det_ymin)
+        det_xmin += -min_overflow
+        det_ymin += -min_overflow
+    if det_xmax > image_width -1 or det_ymax > image_height - 1:
+        max_overflow = max(det_xmax - image_width -1, det_ymax - image_height-1)
+        det_xmax -= max_overflow
+        det_ymax -= max_overflow
+    det_width = det_xmax - det_xmin + 1
+    det_height = det_ymax - det_ymin + 1
+    det_crop = image[det_ymin:det_ymax, det_xmin:det_xmax, :]
+    return det_crop, det_ymin, det_ymax, det_xmin, det_xmax
+    #Image.fromarray(det_crop).show()
+    #exit()
+def demo_image(image_dir, pid, save_dir, preprocess, cfg, input_size, net_stride, num_nb, use_gpu, flip=False, start_frame=0,
+               vertical_crop : bool = False,
+               static_crop : bool = False,
+               max_bbox : bool = False,
+               disable_cropping : bool = False,
+               ):
+    if cfg.use_gpu:
+        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    else:
+        device = torch.device("cpu")
+    detector = FaceBoxesDetector('FaceBoxes', f'{env_paths.CODE_BASE}/src/pixel3dmm/preprocessing/PIPNet/FaceBoxesV2/weights/FaceBoxesV2.pth', use_gpu, device)
+    my_thresh = 0.6
+    det_box_scale = 1.2
+    meanface_indices, reverse_index1, reverse_index2, max_len = get_meanface(
+        os.path.join(f'{env_paths.CODE_BASE}/src/pixel3dmm/preprocessing/', 'PIPNet', 'data', cfg.data_name, 'meanface.txt'), cfg.num_nb)
+    if cfg.backbone == 'resnet18':
+        resnet18 = models.resnet18(pretrained=cfg.pretrained)
+        net = Pip_resnet18(resnet18, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size,
+                           net_stride=cfg.net_stride)
+    elif cfg.backbone == 'resnet50':
+        resnet50 = models.resnet50(pretrained=cfg.pretrained)
+        net = Pip_resnet50(resnet50, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size,
+                           net_stride=cfg.net_stride)
+    elif cfg.backbone == 'resnet101':
+        resnet101 = models.resnet101(pretrained=cfg.pretrained)
+        net = Pip_resnet101(resnet101, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size,
+                            net_stride=cfg.net_stride)
+    elif cfg.backbone == 'mobilenet_v2':
+        mbnet = models.mobilenet_v2(pretrained=cfg.pretrained)
+        net = Pip_mbnetv2(mbnet, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size, net_stride=cfg.net_stride)
+    elif cfg.backbone == 'mobilenet_v3':
+        mbnet = mobilenetv3_large()
+        if cfg.pretrained:
+            mbnet.load_state_dict(torch.load('lib/mobilenetv3-large-1cd25616.pth'))
+        net = Pip_mbnetv3(mbnet, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size, net_stride=cfg.net_stride)
+    else:
+        print('No such backbone!')
+        exit(0)
+    net = net.to(device)
+    weight_file = os.path.join(save_dir, 'epoch%d.pth' % (cfg.num_epochs - 1))
+    state_dict = torch.load(weight_file, map_location=device)
+    net.load_state_dict(state_dict)
+    net.eval()
+    if start_frame > 0:
+        files = [f for f in os.listdir(f'{image_dir}/') if f.endswith('.jpg') or f.endswith('.png') and (((int(f.split('_')[-1].split('.')[0])-start_frame) % 3 )== 0)]
+    else:
+        files = [f for f in os.listdir(f'{image_dir}/') if f.endswith('.jpg') or f.endswith('.png')]
+    files.sort()
+    if not vertical_crop:
+        all_detections = []
+        all_images = []
+        #all_normals = []
+        succ_files = []
+        for file_name in files:
+                image = cv2.imread(f'{image_dir}/{file_name}')
+                #normals = cv2.imread(f'{image_dir}/../normals/{file_name[:-4]}.png')
+                if len(image.shape) < 3 or image.shape[-1] != 3:
+                    continue
+                image_height, image_width, _ = image.shape
+                detections, _ = detector.detect(image, my_thresh, 1)
+                dets_filtered = [det for det in detections if det[0] == 'face']
+                dets_filtered.sort(key=lambda x: -1 * x[1])
+                detections = dets_filtered
+                if detections[0][1] < 0.75:
+                    raise ValueError("Found face with too low detections confidence as max confidence")
+                all_detections.append(detections[0])
+                all_images.append(image)
+                #all_normals.append(normals)
+                succ_files.append(file_name)
+        assert static_crop, 'Other options currently not supported anymore'
+        if static_crop:
+            #if max_bbox:
+            det1_max = np.min(np.array([x[2] for x in all_detections]), axis=0)
+            det2_max = np.min(np.array([x[3] for x in all_detections]), axis=0)
+            det3_max = np.max(np.array([x[4]+x[2]-det1_max for x in all_detections]), axis=0)
+            det4_max = np.max(np.array([x[5]+x[3]-det2_max for x in all_detections]), axis=0)
+            det1 = np.mean(np.array([x[2] for x in all_detections]), axis=0)
+            det2 = np.mean(np.array([x[3] for x in all_detections]), axis=0)
+            det3 = np.mean(np.array([x[4] for x in all_detections]), axis=0)
+            det4 = np.mean(np.array([x[5] for x in all_detections]), axis=0)
+            det_smoothed = np.stack([det1, det2, det3, det4], axis=0).astype(np.int32)
+            det_smoothed_max = np.stack([det1_max, det2_max, det3_max, det4_max], axis=0).astype(np.int32)
+            all_detections_smoothed = []  # = [[x[0], x[1], x_smoothed[0], x_smoothed[1], x_smoothed[2], x_smoothed[3]] for x, x_smoothed in zip()]
+            all_detections_max_smoothed = []  # = [[x[0], x[1], x_smoothed[0], x_smoothed[1], x_smoothed[2], x_smoothed[3]] for x, x_smoothed in zip()]
+            for i, det in enumerate(all_detections):
+                all_detections_smoothed.append(
+                    [det[0], det[1], det_smoothed[0], det_smoothed[1], det_smoothed[2], det_smoothed[3]])
+                all_detections_max_smoothed.append(
+                    [det[0], det[1], det_smoothed_max[0], det_smoothed_max[1], det_smoothed_max[2], det_smoothed_max[3]])
+            all_detections = all_detections_smoothed
+            all_detections_max = all_detections_max_smoothed
+        else:
+            if len(all_detections) > 11:
+                WINDOW_LENGTH = 11
+                det1 = smooth(np.array([x[2] for x in all_detections]), window_len=WINDOW_LENGTH)
+                det2 = smooth(np.array([x[3] for x in all_detections]), window_len=WINDOW_LENGTH)
+                det3 = smooth(np.array([x[4] for x in all_detections]), window_len=WINDOW_LENGTH)
+                det4 = smooth(np.array([x[5] for x in all_detections]), window_len=WINDOW_LENGTH)
+                det_smoothed = np.stack([det1, det2,det3,det4], axis=1).astype(np.int32)
+                all_detections_smoothed = [] #= [[x[0], x[1], x_smoothed[0], x_smoothed[1], x_smoothed[2], x_smoothed[3]] for x, x_smoothed in zip()]
+                for i, det in enumerate(all_detections):
+                    all_detections_smoothed.append([det[0], det[1], det_smoothed[i, 0], det_smoothed[i, 1], det_smoothed[i, 2], det_smoothed[i, 3]])
+                all_detections = all_detections_smoothed
+        # TODO: smooth detections!!!
+        for file_name, detection, detection_max, image in zip(succ_files, all_detections, all_detections_max, all_images):
+                        if not disable_cropping:
+                            img_crop, det_ymin, det_ymax, det_xmin, det_xmax = get_cstm_crop(image, detection, detection_max, max_bbox=max_bbox)
+                            #n_crop = get_cstm_crop(normals, detection)
+                            image = img_crop
+                        # save cropped image
+                        os.makedirs(f'{image_dir}/../cropped/', exist_ok=True)
+                        #os.makedirs(f'{image_dir}/../cropped_normals/', exist_ok=True)
+                        cv2.imwrite(f'{image_dir}/../cropped/{file_name}', cv2.resize(image, (512, 512)))
+                        #cv2.imwrite(f'{image_dir}/../cropped_normals/{file_name[:-4]}.png', cv2.resize(n_crop, (512, 512)))
+                        # store cropping information:
+                        if not os.path.exists(f'{image_dir}/../crop_ymin_ymax_xmin_xmax.npy'):
+                            np.save(f'{image_dir}/../crop_ymin_ymax_xmin_xmax.npy', np.array([det_ymin, det_ymax, det_xmin, det_xmax]))
+    else:
+        for file_name in files:
+            image = cv2.imread(f'{image_dir}/{file_name}')
+            if image.shape[0] != image.shape[1]:
+                image = image[220:-220, 640:-640, :]
+            os.makedirs(f'{image_dir}/../cropped/', exist_ok=True)
+            cv2.imwrite(f'{image_dir}/../cropped/{file_name}', cv2.resize(image, (512, 512)))
+    # run landmark detection
+    lms = []
+    image_dir = f'{image_dir}/../cropped/'
+    for file_name in files:
+                image = cv2.imread(f'{image_dir}/{file_name}')
+                if len(image.shape) < 3 or image.shape[-1] != 3:
+                    continue
+                if flip:
+                    image = cv2.transpose(image)
+                image_height, image_width, _ = image.shape
+                detections, _ = detector.detect(image, my_thresh, 1)
+                pred_export = None
+                dets_filtered = [det for det in detections if det[0] == 'face']
+                dets_filtered.sort(key=lambda x: -1 * x[1])
+                detections = dets_filtered
+                print(detections)
+                for i in range(min(1, len(detections))):
+                    if detections[i][1] < 0.99:
+                        continue
+                    det_xmin = detections[i][2]
+                    det_ymin = detections[i][3]
+                    det_width = detections[i][4]
+                    det_height = detections[i][5]
+                    det_xmax = det_xmin + det_width - 1
+                    det_ymax = det_ymin + det_height - 1
+                    det_xmin -= int(det_width * (det_box_scale - 1) / 2)
+                    # remove a part of top area for alignment, see paper for details
+                    det_ymin += int(det_height * (det_box_scale - 1) / 2)
+                    det_xmax += int(det_width * (det_box_scale - 1) / 2)
+                    det_ymax += int(det_height * (det_box_scale - 1) / 2)
+                    det_xmin = max(det_xmin, 0)
+                    det_ymin = max(det_ymin, 0)
+                    det_xmax = min(det_xmax, image_width - 1)
+                    det_ymax = min(det_ymax, image_height - 1)
+                    det_width = det_xmax - det_xmin + 1
+                    det_height = det_ymax - det_ymin + 1
+                    cv2.rectangle(image, (det_xmin, det_ymin), (det_xmax, det_ymax), (0, 0, 255), 2)
+                    det_crop = image[det_ymin:det_ymax, det_xmin:det_xmax, :]
+                    #np.save(f'{CROP_DIR}/{pid[:-4]}.npy', np.array([det_ymin, det_ymax, det_xmin, det_xmax]))
+                    det_crop = cv2.resize(det_crop, (input_size, input_size))
+                    inputs = Image.fromarray(det_crop[:, :, ::-1].astype('uint8'), 'RGB')
+                    #inputs.show()
+                    inputs = preprocess(inputs).unsqueeze(0)
+                    inputs = inputs.to(device)
+                    lms_pred_x, lms_pred_y, lms_pred_nb_x, lms_pred_nb_y, outputs_cls, max_cls = forward_pip(net,
+                                                                                                             inputs,
+                                                                                                             preprocess,
+                                                                                                             input_size,
+                                                                                                             net_stride,
+                                                                                                             num_nb)
+                    lms_pred = torch.cat((lms_pred_x, lms_pred_y), dim=1).flatten()
+                    tmp_nb_x = lms_pred_nb_x[reverse_index1, reverse_index2].view(cfg.num_lms, max_len)
+                    tmp_nb_y = lms_pred_nb_y[reverse_index1, reverse_index2].view(cfg.num_lms, max_len)
+                    tmp_x = torch.mean(torch.cat((lms_pred_x, tmp_nb_x), dim=1), dim=1).view(-1, 1)
+                    tmp_y = torch.mean(torch.cat((lms_pred_y, tmp_nb_y), dim=1), dim=1).view(-1, 1)
+                    lms_pred_merge = torch.cat((tmp_x, tmp_y), dim=1).flatten()
+                    lms_pred = lms_pred.cpu().numpy()
+                    lms_pred_merge = lms_pred_merge.cpu().numpy()
+                    pred_export = np.zeros([cfg.num_lms, 2])
+                    for i in range(cfg.num_lms):
+                        x_pred = lms_pred_merge[i * 2] * det_width
+                        y_pred = lms_pred_merge[i * 2 + 1] * det_height
+                        pred_export[i, 0] = (x_pred + det_xmin) / image_width
+                        pred_export[i, 1] = (y_pred + det_ymin) / image_height
+                        cv2.circle(image, (int(x_pred) + det_xmin, int(y_pred) + det_ymin), 1, (0, 0, 255), 2)
+                        if i == 76:
+                            cv2.circle(image, (int(x_pred) + det_xmin, int(y_pred) + det_ymin), 1, (255, 0, 0), 2)
+                if pred_export is not None:
+                    print('exporting stuff to ' + image_dir)
+                    landmakr_dir =  f'{image_dir}/../PIPnet_landmarks/'
+                    os.makedirs(landmakr_dir, exist_ok=True)
+                    np.save(landmakr_dir + f'/{file_name[:-4]}.npy', pred_export)
+                    lms.append(pred_export)
+                    exp_dir = image_dir + '/../PIPnet_annotated_images/'
+                    os.makedirs(exp_dir, exist_ok=True)
+                    cv2.imwrite(exp_dir + f'/{file_name}', image)
+                # cv2.imshow('1', image)
+                # cv2.waitKey(0)
+    lms = np.stack(lms, axis=0)
+    os.makedirs(f'{image_dir}/../pipnet', exist_ok=True)
+    np.save(f'{image_dir}/../pipnet/test.npy', lms)

src/pixel3dmm/preprocessing/replacement_code/__init__.py ADDED Viewed

File without changes

src/pixel3dmm/preprocessing/replacement_code/facer_transform.py ADDED Viewed

	@@ -0,0 +1,397 @@

+from typing import List, Dict, Callable, Tuple, Optional
+import torch
+import torch.nn.functional as F
+import functools
+import numpy as np
+def get_crop_and_resize_matrix(
+        box: torch.Tensor, target_shape: Tuple[int, int],
+        target_face_scale: float = 1.0, make_square_crop: bool = True,
+        offset_xy: Optional[Tuple[float, float]] = None, align_corners: bool = True,
+        offset_box_coords: bool = False) -> torch.Tensor:
+    """
+    Args:
+        box: b x 4(x1, y1, x2, y2)
+        align_corners (bool): Set this to `True` only if the box you give has coordinates
+            ranging from `0` to `h-1` or `w-1`.
+        offset_box_coords (bool): Set this to `True` if the box you give has coordinates
+            ranging from `0` to `h` or `w`.
+            Set this to `False` if the box coordinates range from `-0.5` to `h-0.5` or `w-0.5`.
+            If the box coordinates range from `0` to `h-1` or `w-1`, set `align_corners=True`.
+    Returns:
+        torch.Tensor: b x 3 x 3.
+    """
+    if offset_xy is None:
+        offset_xy = (0.0, 0.0)
+    x1, y1, x2, y2 = box.split(1, dim=1)  # b x 1
+    cx = (x1 + x2) / 2 + offset_xy[0]
+    cy = (y1 + y2) / 2 + offset_xy[1]
+    rx = (x2 - x1) / 2 / target_face_scale
+    ry = (y2 - y1) / 2 / target_face_scale
+    if make_square_crop:
+        rx = ry = torch.maximum(rx, ry)
+    x1, y1, x2, y2 = cx - rx, cy - ry, cx + rx, cy + ry
+    h, w, *_ = target_shape
+    zeros_pl = torch.zeros_like(x1)
+    ones_pl = torch.ones_like(x1)
+    if align_corners:
+        # x -> (x - x1) / (x2 - x1) * (w - 1)
+        # y -> (y - y1) / (y2 - y1) * (h - 1)
+        ax = 1.0 / (x2 - x1) * (w - 1)
+        ay = 1.0 / (y2 - y1) * (h - 1)
+        matrix = torch.cat([
+            ax, zeros_pl, -x1 * ax,
+            zeros_pl, ay, -y1 * ay,
+            zeros_pl, zeros_pl, ones_pl
+        ], dim=1).reshape(-1, 3, 3)  # b x 3 x 3
+    else:
+        if offset_box_coords:
+            # x1, x2 \in [0, w], y1, y2 \in [0, h]
+            # first we should offset x1, x2, y1, y2 to be ranging in
+            # [-0.5, w-0.5] and [-0.5, h-0.5]
+            # so to convert these pixel coordinates into boundary coordinates.
+            x1, x2, y1, y2 = x1-0.5, x2-0.5, y1-0.5, y2-0.5
+        # x -> (x - x1) / (x2 - x1) * w - 0.5
+        # y -> (y - y1) / (y2 - y1) * h - 0.5
+        ax = 1.0 / (x2 - x1) * w
+        ay = 1.0 / (y2 - y1) * h
+        matrix = torch.cat([
+            ax, zeros_pl, -x1 * ax - 0.5*ones_pl,
+            zeros_pl, ay, -y1 * ay - 0.5*ones_pl,
+            zeros_pl, zeros_pl, ones_pl
+        ], dim=1).reshape(-1, 3, 3)  # b x 3 x 3
+    return matrix
+def get_similarity_transform_matrix(
+        from_pts: torch.Tensor, to_pts: torch.Tensor) -> torch.Tensor:
+    """
+    Args:
+        from_pts, to_pts: b x n x 2
+    Returns:
+        torch.Tensor: b x 3 x 3
+    """
+    mfrom = from_pts.mean(dim=1, keepdim=True)  # b x 1 x 2
+    mto = to_pts.mean(dim=1, keepdim=True)  # b x 1 x 2
+    a1 = (from_pts - mfrom).square().sum([1, 2], keepdim=False)  # b
+    c1 = ((to_pts - mto) * (from_pts - mfrom)).sum([1, 2], keepdim=False)  # b
+    to_delta = to_pts - mto
+    from_delta = from_pts - mfrom
+    c2 = (to_delta[:, :, 0] * from_delta[:, :, 1] - to_delta[:,
+          :, 1] * from_delta[:, :, 0]).sum([1], keepdim=False)  # b
+    a = c1 / a1
+    b = c2 / a1
+    dx = mto[:, 0, 0] - a * mfrom[:, 0, 0] - b * mfrom[:, 0, 1]  # b
+    dy = mto[:, 0, 1] + b * mfrom[:, 0, 0] - a * mfrom[:, 0, 1]  # b
+    ones_pl = torch.ones_like(a1)
+    zeros_pl = torch.zeros_like(a1)
+    return torch.stack([
+        a, b, dx,
+        -b, a, dy,
+        zeros_pl, zeros_pl, ones_pl,
+    ], dim=-1).reshape(-1, 3, 3)
+@functools.lru_cache()
+def _standard_face_pts():
+    pts = torch.tensor([
+        196.0, 226.0,
+        316.0, 226.0,
+        256.0, 286.0,
+        220.0, 360.4,
+        292.0, 360.4], dtype=torch.float32) / 256.0 - 1.0
+    return torch.reshape(pts, (5, 2))
+def get_face_align_matrix(
+        face_pts: torch.Tensor, target_shape: Tuple[int, int],
+        target_face_scale: float = 1.0, offset_xy: Optional[Tuple[float, float]] = None,
+        target_pts: Optional[torch.Tensor] = None):
+    if target_pts is None:
+        with torch.no_grad():
+            std_pts = _standard_face_pts().to(face_pts)  # [-1 1]
+            h, w, *_ = target_shape
+            target_pts = (std_pts * target_face_scale + 1) * \
+                torch.tensor([w-1, h-1]).to(face_pts) / 2.0
+            if offset_xy is not None:
+                target_pts[:, 0] += offset_xy[0]
+                target_pts[:, 1] += offset_xy[1]
+    else:
+        target_pts = target_pts.to(face_pts)
+    if target_pts.dim() == 2:
+        target_pts = target_pts.unsqueeze(0)
+    if target_pts.size(0) == 1:
+        target_pts = target_pts.broadcast_to(face_pts.shape)
+    assert target_pts.shape == face_pts.shape
+    return get_similarity_transform_matrix(face_pts, target_pts)
+def rot90(v):
+    return np.array([-v[1], v[0]])
+def get_quad(lm: torch.Tensor):
+    # N,2
+    lm = lm.detach().cpu().numpy()
+    # Choose oriented crop rectangle.
+    eye_avg = (lm[0] + lm[1]) * 0.5 + 0.5
+    mouth_avg = (lm[3] + lm[4]) * 0.5 + 0.5
+    eye_to_eye = lm[1] - lm[0]
+    eye_to_mouth = mouth_avg - eye_avg
+    x = eye_to_eye - rot90(eye_to_mouth)
+    x /= np.hypot(*x)
+    x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
+    y = rot90(x)
+    c = eye_avg + eye_to_mouth * 0.1
+    quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
+    quad_for_coeffs = quad[[0,3, 2,1]] #  顺序改一下
+    return torch.from_numpy(quad_for_coeffs).float()
+def get_face_align_matrix_celebm(
+        face_pts: torch.Tensor, target_shape: Tuple[int, int], bbox_scale_factor: float = 1.0):
+    face_pts = torch.stack([get_quad(pts) for pts in face_pts], dim=0).to(face_pts)
+    face_mean = face_pts.mean(axis=1).unsqueeze(1)
+    diff = face_pts - face_mean
+    face_pts = face_mean + torch.tensor([[[1.5, 1.5]]], device=diff.device)*diff
+    assert target_shape[0] == target_shape[1]
+    diagonal = torch.norm(face_pts[:, 0, :] - face_pts[:, 2, :], dim=-1)
+    min_bbox_size = 350
+    max_bbox_size = 500
+    bbox_scale_factor = bbox_scale_factor +  torch.clamp((max_bbox_size-diagonal)/(max_bbox_size-min_bbox_size), 0, 1)
+    print(bbox_scale_factor)
+    target_size  = target_shape[0]/bbox_scale_factor
+    #target_pts = torch.as_tensor([[0, 0], [target_size,0], [target_size, target_size], [0, target_size]]).to(face_pts)
+    target_ptss = []
+    for tidx in range(target_size.shape[0]):
+        target_pts = torch.as_tensor([[0, 0], [target_size[tidx],0], [target_size[tidx], target_size[tidx]], [0, target_size[tidx]]]).to(face_pts)
+        target_pts += int( (target_shape[0]-target_size[tidx])/2 )
+        target_ptss.append(target_pts)
+    target_pts = torch.stack(target_ptss, dim=0)
+    #if target_pts.dim() == 2:
+    #    target_pts = target_pts.unsqueeze(0)
+    #if target_pts.size(0) == 1:
+    #    target_pts = target_pts.broadcast_to(face_pts.shape)
+    assert target_pts.shape == face_pts.shape
+    return get_similarity_transform_matrix(face_pts, target_pts)
+@functools.lru_cache(maxsize=128)
+def _meshgrid(h, w) -> Tuple[torch.Tensor, torch.Tensor]:
+    yy, xx = torch.meshgrid(torch.arange(h).float(),
+                            torch.arange(w).float(),
+                            indexing='ij')
+    return yy, xx
+def _forge_grid(batch_size: int, device: torch.device,
+                output_shape: Tuple[int, int],
+                fn: Callable[[torch.Tensor], torch.Tensor]
+                ) -> Tuple[torch.Tensor, torch.Tensor]:
+    """ Forge transform maps with a given function `fn`.
+    Args:
+        output_shape (tuple): (b, h, w, ...).
+        fn (Callable[[torch.Tensor], torch.Tensor]): The function that accepts
+            a bxnx2 array and outputs the transformed bxnx2 array. Both input
+            and output store (x, y) coordinates.
+    Note:
+        both input and output arrays of `fn` should store (y, x) coordinates.
+    Returns:
+        Tuple[torch.Tensor, torch.Tensor]: Two maps `X` and `Y`, where for each
+            pixel (y, x) or coordinate (x, y),
+            `(X[y, x], Y[y, x]) = fn([x, y])`
+    """
+    h, w, *_ = output_shape
+    yy, xx = _meshgrid(h, w)  # h x w
+    yy = yy.unsqueeze(0).broadcast_to(batch_size, h, w).to(device)
+    xx = xx.unsqueeze(0).broadcast_to(batch_size, h, w).to(device)
+    in_xxyy = torch.stack(
+        [xx, yy], dim=-1).reshape([batch_size, h*w, 2])  # (h x w) x 2
+    out_xxyy: torch.Tensor = fn(in_xxyy)  # (h x w) x 2
+    return out_xxyy.reshape(batch_size, h, w, 2)
+def _safe_arctanh(x: torch.Tensor, eps: float = 0.001) -> torch.Tensor:
+    return torch.clamp(x, -1+eps, 1-eps).arctanh()
+def inverted_tanh_warp_transform(coords: torch.Tensor, matrix: torch.Tensor,
+                                 warp_factor: float, warped_shape: Tuple[int, int]):
+    """ Inverted tanh-warp function.
+    Args:
+        coords (torch.Tensor): b x n x 2 (x, y). The transformed coordinates.
+        matrix: b x 3 x 3. A matrix that transforms un-normalized coordinates
+            from the original image to the aligned yet not-warped image.
+        warp_factor (float): The warp factor.
+            0 means linear transform, 1 means full tanh warp.
+        warped_shape (tuple): [height, width].
+    Returns:
+        torch.Tensor: b x n x 2 (x, y). The original coordinates.
+    """
+    h, w, *_ = warped_shape
+    # h -= 1
+    # w -= 1
+    w_h = torch.tensor([[w, h]]).to(coords)
+    if warp_factor > 0:
+        # normalize coordinates to [-1, +1]
+        coords = coords / w_h * 2 - 1
+        nl_part1 = coords > 1.0 - warp_factor
+        nl_part2 = coords < -1.0 + warp_factor
+        ret_nl_part1 = _safe_arctanh(
+            (coords - 1.0 + warp_factor) /
+            warp_factor) * warp_factor + \
+            1.0 - warp_factor
+        ret_nl_part2 = _safe_arctanh(
+            (coords + 1.0 - warp_factor) /
+            warp_factor) * warp_factor - \
+            1.0 + warp_factor
+        coords = torch.where(nl_part1, ret_nl_part1,
+                             torch.where(nl_part2, ret_nl_part2, coords))
+        # denormalize
+        coords = (coords + 1) / 2 * w_h
+    coords_homo = torch.cat(
+        [coords, torch.ones_like(coords[:, :, [0]])], dim=-1)  # b x n x 3
+    inv_matrix = torch.linalg.inv(matrix)  # b x 3 x 3
+    # inv_matrix = np.linalg.inv(matrix)
+    coords_homo = torch.bmm(
+        coords_homo, inv_matrix.permute(0, 2, 1))  # b x n x 3
+    return coords_homo[:, :, :2] / coords_homo[:, :, [2, 2]]
+def tanh_warp_transform(
+        coords: torch.Tensor, matrix: torch.Tensor,
+        warp_factor: float, warped_shape: Tuple[int, int]):
+    """ Tanh-warp function.
+    Args:
+        coords (torch.Tensor): b x n x 2 (x, y). The original coordinates.
+        matrix: b x 3 x 3. A matrix that transforms un-normalized coordinates
+            from the original image to the aligned yet not-warped image.
+        warp_factor (float): The warp factor.
+            0 means linear transform, 1 means full tanh warp.
+        warped_shape (tuple): [height, width].
+    Returns:
+        torch.Tensor: b x n x 2 (x, y). The transformed coordinates.
+    """
+    h, w, *_ = warped_shape
+    # h -= 1
+    # w -= 1
+    w_h = torch.tensor([[w, h]]).to(coords)
+    coords_homo = torch.cat(
+        [coords, torch.ones_like(coords[:, :, [0]])], dim=-1)  # b x n x 3
+    coords_homo = torch.bmm(coords_homo, matrix.transpose(2, 1))  # b x n x 3
+    coords = (coords_homo[:, :, :2] / coords_homo[:, :, [2, 2]])  # b x n x 2
+    if warp_factor > 0:
+        # normalize coordinates to [-1, +1]
+        coords = coords / w_h * 2 - 1
+        nl_part1 = coords > 1.0 - warp_factor
+        nl_part2 = coords < -1.0 + warp_factor
+        ret_nl_part1 = torch.tanh(
+            (coords - 1.0 + warp_factor) /
+            warp_factor) * warp_factor + \
+            1.0 - warp_factor
+        ret_nl_part2 = torch.tanh(
+            (coords + 1.0 - warp_factor) /
+            warp_factor) * warp_factor - \
+            1.0 + warp_factor
+        coords = torch.where(nl_part1, ret_nl_part1,
+                             torch.where(nl_part2, ret_nl_part2, coords))
+        # denormalize
+        coords = (coords + 1) / 2 * w_h
+    return coords
+def make_tanh_warp_grid(matrix: torch.Tensor, warp_factor: float,
+                        warped_shape: Tuple[int, int],
+                        orig_shape: Tuple[int, int]):
+    """
+    Args:
+        matrix: bx3x3 matrix.
+        warp_factor: The warping factor. `warp_factor=1.0` represents a vannila Tanh-warping,
+           `warp_factor=0.0` represents a cropping.
+        warped_shape: The target image shape to transform to.
+    Returns:
+        torch.Tensor: b x h x w x 2 (x, y).
+    """
+    orig_h, orig_w, *_ = orig_shape
+    w_h = torch.tensor([orig_w, orig_h]).to(matrix).reshape(1, 1, 1, 2)
+    return _forge_grid(
+        matrix.size(0), matrix.device,
+        warped_shape,
+        functools.partial(inverted_tanh_warp_transform,
+                          matrix=matrix,
+                          warp_factor=warp_factor,
+                          warped_shape=warped_shape)) / w_h*2-1
+def make_inverted_tanh_warp_grid(matrix: torch.Tensor, warp_factor: float,
+                                 warped_shape: Tuple[int, int],
+                                 orig_shape: Tuple[int, int]):
+    """
+    Args:
+        matrix: bx3x3 matrix.
+        warp_factor: The warping factor. `warp_factor=1.0` represents a vannila Tanh-warping,
+           `warp_factor=0.0` represents a cropping.
+        warped_shape: The target image shape to transform to.
+        orig_shape: The original image shape that is transformed from.
+    Returns:
+        torch.Tensor: b x h x w x 2 (x, y).
+    """
+    h, w, *_ = warped_shape
+    w_h = torch.tensor([w, h]).to(matrix).reshape(1, 1, 1, 2)
+    return _forge_grid(
+        matrix.size(0), matrix.device,
+        orig_shape,
+        functools.partial(tanh_warp_transform,
+                          matrix=matrix,
+                          warp_factor=warp_factor,
+                          warped_shape=warped_shape)) / w_h * 2-1

src/pixel3dmm/preprocessing/replacement_code/farl.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from typing import Optional, Dict, Any
+import functools
+import torch
+import torch.nn.functional as F
+from ..util import download_jit
+from ..transform import (get_crop_and_resize_matrix, get_face_align_matrix, get_face_align_matrix_celebm,
+                         make_inverted_tanh_warp_grid, make_tanh_warp_grid)
+from .base import FaceParser
+pretrain_settings = {
+    'lapa/448': {
+        'url': [
+            'https://github.com/FacePerceiver/facer/releases/download/models-v1/face_parsing.farl.lapa.main_ema_136500_jit191.pt',
+        ],
+        'matrix_src_tag': 'points',
+        'get_matrix_fn': functools.partial(get_face_align_matrix,
+                                           target_shape=(448, 448), target_face_scale=1.0),
+        'get_grid_fn': functools.partial(make_tanh_warp_grid,
+                                         warp_factor=0.8, warped_shape=(448, 448)),
+        'get_inv_grid_fn': functools.partial(make_inverted_tanh_warp_grid,
+                                             warp_factor=0.8, warped_shape=(448, 448)),
+        'label_names': ['background', 'face', 'rb', 'lb', 're',
+                        'le', 'nose',  'ulip', 'imouth', 'llip', 'hair']
+    },
+    'celebm/448': {
+        'url': [
+            'https://github.com/FacePerceiver/facer/releases/download/models-v1/face_parsing.farl.celebm.main_ema_181500_jit.pt',
+        ],
+        'matrix_src_tag': 'points',
+        'get_matrix_fn': functools.partial(get_face_align_matrix_celebm,
+                                           target_shape=(448, 448)),
+        'get_grid_fn': functools.partial(make_tanh_warp_grid,
+                                         warp_factor=0, warped_shape=(448, 448)),
+        'get_inv_grid_fn': functools.partial(make_inverted_tanh_warp_grid,
+                                             warp_factor=0, warped_shape=(448, 448)),
+        'label_names':  [
+                    'background', 'neck', 'face', 'cloth', 'rr', 'lr', 'rb', 'lb', 're',
+                    'le', 'nose', 'imouth', 'llip', 'ulip', 'hair',
+                    'eyeg', 'hat', 'earr', 'neck_l']
+    }
+}
+class FaRLFaceParser(FaceParser):
+    """ The face parsing models from [FaRL](https://github.com/FacePerceiver/FaRL).
+    Please consider citing
+    ```bibtex
+        @article{zheng2021farl,
+            title={General Facial Representation Learning in a Visual-Linguistic Manner},
+            author={Zheng, Yinglin and Yang, Hao and Zhang, Ting and Bao, Jianmin and Chen,
+                Dongdong and Huang, Yangyu and Yuan, Lu and Chen,
+                Dong and Zeng, Ming and Wen, Fang},
+            journal={arXiv preprint arXiv:2112.03109},
+            year={2021}
+        }
+    ```
+    """
+    def __init__(self, conf_name: Optional[str] = None,
+                 model_path: Optional[str] = None, device=None) -> None:
+        super().__init__()
+        if conf_name is None:
+            conf_name = 'lapa/448'
+        if model_path is None:
+            model_path = pretrain_settings[conf_name]['url']
+        self.conf_name = conf_name
+        self.net = download_jit(model_path, map_location=device)
+        self.eval()
+    def forward(self, images: torch.Tensor, data: Dict[str, Any], bbox_scale_factor : float = 1.0):
+        setting = pretrain_settings[self.conf_name]
+        images = images.float() / 255.0
+        _, _, h, w = images.shape
+        simages = images[data['image_ids']]
+        matrix_fun = functools.partial(get_face_align_matrix_celebm,
+                                           target_shape=(448, 448), bbox_scale_factor=bbox_scale_factor)
+        matrix = matrix_fun(data[setting['matrix_src_tag']])
+        grid = setting['get_grid_fn'](matrix=matrix, orig_shape=(h, w))
+        inv_grid = setting['get_inv_grid_fn'](matrix=matrix, orig_shape=(h, w))
+        w_images = F.grid_sample(
+            simages, grid, mode='bilinear', align_corners=False)
+        w_seg_logits, _ = self.net(w_images)  # (b*n) x c x h x w
+        seg_logits = F.grid_sample(w_seg_logits, inv_grid, mode='bilinear', align_corners=False)
+        data['seg'] = {'logits': seg_logits,
+                       'label_names': setting['label_names']}
+        return data

src/pixel3dmm/preprocessing/replacement_code/mica.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# -*- coding: utf-8 -*-
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
+# holder of all proprietary rights on this computer program.
+# You can only use this computer program if you have closed
+# a license agreement with MPG or you get the right to use the computer
+# program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and
+# liable to prosecution.
+#
+# Copyright©2023 Max-Planck-Gesellschaft zur Förderung
+# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
+# for Intelligent Systems. All rights reserved.
+#
+# Contact: [email protected]
+import os
+import sys
+sys.path.append("./nfclib")
+import torch
+import torch.nn.functional as F
+from models.arcface import Arcface
+from models.generator import Generator
+from micalib.base_model import BaseModel
+from loguru import logger
+class MICA(BaseModel):
+    def __init__(self, config=None, device=None, tag='MICA'):
+        super(MICA, self).__init__(config, device, tag)
+        self.initialize()
+    def create_model(self, model_cfg):
+        mapping_layers = model_cfg.mapping_layers
+        pretrained_path = None
+        if not model_cfg.use_pretrained:
+            pretrained_path = model_cfg.arcface_pretrained_model
+        self.arcface = Arcface(pretrained_path=pretrained_path).to(self.device)
+        self.flameModel = Generator(512, 300, self.cfg.model.n_shape, mapping_layers, model_cfg, self.device)
+    def load_model(self):
+        model_path = os.path.join(self.cfg.output_dir, 'model.tar')
+        if os.path.exists(self.cfg.pretrained_model_path) and self.cfg.model.use_pretrained:
+            model_path = self.cfg.pretrained_model_path
+        if os.path.exists(model_path):
+            logger.info(f'[{self.tag}] Trained model found. Path: {model_path} | GPU: {self.device}')
+            checkpoint = torch.load(model_path, weights_only=False)
+            if 'arcface' in checkpoint:
+                self.arcface.load_state_dict(checkpoint['arcface'])
+            if 'flameModel' in checkpoint:
+                self.flameModel.load_state_dict(checkpoint['flameModel'])
+        else:
+            logger.info(f'[{self.tag}] Checkpoint not available starting from scratch!')
+    def model_dict(self):
+        return {
+            'flameModel': self.flameModel.state_dict(),
+            'arcface': self.arcface.state_dict()
+        }
+    def parameters_to_optimize(self):
+        return [
+            {'params': self.flameModel.parameters(), 'lr': self.cfg.train.lr},
+            {'params': self.arcface.parameters(), 'lr': self.cfg.train.arcface_lr},
+        ]
+    def encode(self, images, arcface_imgs):
+        codedict = {}
+        codedict['arcface'] = F.normalize(self.arcface(arcface_imgs))
+        codedict['images'] = images
+        return codedict
+    def decode(self, codedict, epoch=0):
+        self.epoch = epoch
+        flame_verts_shape = None
+        shapecode = None
+        if not self.testing:
+            flame = codedict['flame']
+            shapecode = flame['shape_params'].view(-1, flame['shape_params'].shape[2])
+            shapecode = shapecode.to(self.device)[:, :self.cfg.model.n_shape]
+            with torch.no_grad():
+                flame_verts_shape, _, _ = self.flame(shape_params=shapecode)
+        identity_code = codedict['arcface']
+        pred_canonical_vertices, pred_shape_code = self.flameModel(identity_code)
+        output = {
+            'flame_verts_shape': flame_verts_shape,
+            'flame_shape_code': shapecode,
+            'pred_canonical_shape_vertices': pred_canonical_vertices,
+            'pred_shape_code': pred_shape_code,
+            'faceid': codedict['arcface']
+        }
+        return output
+    def compute_losses(self, input, encoder_output, decoder_output):
+        losses = {}
+        pred_verts = decoder_output['pred_canonical_shape_vertices']
+        gt_verts = decoder_output['flame_verts_shape'].detach()
+        pred_verts_shape_canonical_diff = (pred_verts - gt_verts).abs()
+        if self.use_mask:
+            pred_verts_shape_canonical_diff *= self.vertices_mask
+        losses['pred_verts_shape_canonical_diff'] = torch.mean(pred_verts_shape_canonical_diff) * 1000.0
+        return losses

src/pixel3dmm/preprocessing/replacement_code/mica_demo.py ADDED Viewed

	@@ -0,0 +1,188 @@

+# -*- coding: utf-8 -*-
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
+# holder of all proprietary rights on this computer program.
+# You can only use this computer program if you have closed
+# a license agreement with MPG or you get the right to use the computer
+# program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and
+# liable to prosecution.
+#
+# Copyright©2023 Max-Planck-Gesellschaft zur Förderung
+# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
+# for Intelligent Systems. All rights reserved.
+#
+# Contact: [email protected]
+import argparse
+import os
+import random
+import traceback
+from glob import glob
+from pathlib import Path
+from PIL import Image
+import cv2
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import trimesh
+from insightface.app.common import Face
+from insightface.utils import face_align
+from loguru import logger
+from skimage.io import imread
+from tqdm import tqdm
+#from retinaface.pre_trained_models import get_model
+#from retinaface.utils import vis_annotations
+#from matplotlib import pyplot as plt
+from pixel3dmm.preprocessing.MICA.configs.config import get_cfg_defaults
+from pixel3dmm.preprocessing.MICA.datasets.creation.util import get_arcface_input, get_center, draw_on
+from pixel3dmm.preprocessing.MICA.utils import util
+from pixel3dmm.preprocessing.MICA.utils.landmark_detector import LandmarksDetector, detectors
+from pixel3dmm import env_paths
+#model = get_model("resnet50_2020-07-20", max_size=512)
+#model.eval()
+def deterministic(rank):
+    torch.manual_seed(rank)
+    torch.cuda.manual_seed(rank)
+    np.random.seed(rank)
+    random.seed(rank)
+    cudnn.deterministic = True
+    cudnn.benchmark = False
+def process(args, app, image_size=224, draw_bbox=False):
+    dst = Path(args.a)
+    dst.mkdir(parents=True, exist_ok=True)
+    processes = []
+    image_paths = sorted(glob(args.i + '/*.*'))#[:1]
+    image_paths = image_paths[::max(1, len(image_paths)//10)]
+    for image_path in tqdm(image_paths):
+        name = Path(image_path).stem
+        img = cv2.imread(image_path)
+        # FOR pytorch retinaface use this: img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        # I had issues with onnxruntime!
+        bboxes, kpss = app.detect(img)
+        #annotation = model.predict_jsons(img)
+        #Image.fromarray(vis_annotations(img, annotation)).show()
+        #bboxes = np.stack([np.array( annotation[0]['bbox'] + [annotation[0]['score']] ) for i in range(len(annotation))], axis=0)
+        #kpss = np.stack([np.array( annotation[0]['landmarks'] ) for i in range(len(annotation))], axis=0)
+        if bboxes.shape[0] == 0:
+            logger.error(f'[ERROR] Face not detected for {image_path}')
+            continue
+        i = get_center(bboxes, img)
+        bbox = bboxes[i, 0:4]
+        det_score = bboxes[i, 4]
+        kps = None
+        if kpss is not None:
+            kps = kpss[i]
+        ##for ikp in range(kps.shape[0]):
+        #    img[int(kps[ikp][1]), int(kps[ikp][0]), 0] = 255
+        #    img[int(kpss_[0][ikp][1]), int(kpss_[0][ikp][0]), 1] = 255
+        #Image.fromarray(img).show()
+        face = Face(bbox=bbox, kps=kps, det_score=det_score)
+        blob, aimg = get_arcface_input(face, img)
+        file = str(Path(dst, name))
+        np.save(file, blob)
+        processes.append(file + '.npy')
+        cv2.imwrite(file + '.jpg', face_align.norm_crop(img, landmark=face.kps, image_size=image_size))
+        if draw_bbox:
+            dimg = draw_on(img, [face])
+            cv2.imwrite(file + '_bbox.jpg', dimg)
+    return processes
+def to_batch(path):
+    src = path.replace('npy', 'jpg')
+    if not os.path.exists(src):
+        src = path.replace('npy', 'png')
+    image = imread(src)[:, :, :3]
+    image = image / 255.
+    image = cv2.resize(image, (224, 224)).transpose(2, 0, 1)
+    image = torch.tensor(image).cuda()[None]
+    arcface = np.load(path)
+    arcface = torch.tensor(arcface).cuda()[None]
+    return image, arcface
+def load_checkpoint(args, mica):
+    checkpoint = torch.load(args.m, weights_only=False)
+    if 'arcface' in checkpoint:
+        mica.arcface.load_state_dict(checkpoint['arcface'])
+    if 'flameModel' in checkpoint:
+        mica.flameModel.load_state_dict(checkpoint['flameModel'])
+def main(cfg, args):
+    device = 'cuda:0'
+    cfg.model.testing = True
+    mica = util.find_model_using_name(model_dir='micalib.models', model_name=cfg.model.name)(cfg, device)
+    load_checkpoint(args, mica)
+    mica.eval()
+    faces = mica.flameModel.generator.faces_tensor.cpu()
+    Path(args.o).mkdir(exist_ok=True, parents=True)
+    app = LandmarksDetector(model=detectors.RETINAFACE)
+    with torch.no_grad():
+        logger.info(f'Processing has started...')
+        paths = process(args, app, draw_bbox=False)
+        for path in tqdm(paths):
+            name = Path(path).stem
+            images, arcface = to_batch(path)
+            codedict = mica.encode(images, arcface)
+            opdict = mica.decode(codedict)
+            meshes = opdict['pred_canonical_shape_vertices']
+            code = opdict['pred_shape_code']
+            lmk = mica.flameModel.generator.compute_landmarks(meshes)
+            mesh = meshes[0]
+            landmark_51 = lmk[0, 17:]
+            landmark_7 = landmark_51[[19, 22, 25, 28, 16, 31, 37]]
+            dst = Path(args.o, name)
+            dst.mkdir(parents=True, exist_ok=True)
+            trimesh.Trimesh(vertices=mesh.cpu() * 1000.0, faces=faces, process=False).export(f'{dst}/mesh.ply')  # save in millimeters
+            trimesh.Trimesh(vertices=mesh.cpu() * 1000.0, faces=faces, process=False).export(f'{dst}/mesh.obj')
+            np.save(f'{dst}/identity', code[0].cpu().numpy())
+            np.save(f'{dst}/kpt7', landmark_7.cpu().numpy() * 1000.0)
+            np.save(f'{dst}/kpt68', lmk.cpu().numpy() * 1000.0)
+        logger.info(f'Processing finished. Results has been saved in {args.o}')
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='MICA - Towards Metrical Reconstruction of Human Faces')
+    parser.add_argument('-video_name', required=True, type=str)
+    parser.add_argument('-a', default='demo/arcface', type=str, help='Processed images for MICA input')
+    parser.add_argument('-m', default='data/pretrained/mica.tar', type=str, help='Pretrained model path')
+    args = parser.parse_args()
+    cfg = get_cfg_defaults()
+    args.i = f'{env_paths.PREPROCESSED_DATA}/{args.video_name}/cropped/'
+    args.o = f'{env_paths.PREPROCESSED_DATA}/{args.video_name}/mica/'
+    if os.path.exists(f'{env_paths.PREPROCESSED_DATA}/{args.video_name}/mica/'):
+        if len(os.listdir(f'{env_paths.PREPROCESSED_DATA}/{args.video_name}/mica/')) >= 10:
+            print(f'''
+                            <<<<<<<< ALREADY COMPLETE MICA PREDICTION FOR {args.video_name}, SKIPPING >>>>>>>>
+                            ''')
+            exit()
+    main(cfg, args)

src/pixel3dmm/preprocessing/replacement_code/pipnet_demo.py ADDED Viewed

	@@ -0,0 +1,401 @@

+import traceback
+import cv2, os
+import sys
+sys.path.insert(0, 'FaceBoxesV2')
+sys.path.insert(0, '../..')
+import numpy as np
+import pickle
+import importlib
+from math import floor
+from faceboxes_detector import *
+import time
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.optim as optim
+import torch.utils.data
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+from networks import *
+import data_utils
+from functions import *
+from mobilenetv3 import mobilenetv3_large
+def smooth(x, window_len=11, window='hanning'):
+    """smooth the data using a window with requested size.
+    This method is based on the convolution of a scaled window with the signal.
+    The signal is prepared by introducing reflected copies of the signal
+    (with the window size) in both ends so that transient parts are minimized
+    in the begining and end part of the output signal.
+    input:
+        x: the input signal
+        window_len: the dimension of the smoothing window; should be an odd integer
+        window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
+            flat window will produce a moving average smoothing.
+    output:
+        the smoothed signal
+    example:
+    t=linspace(-2,2,0.1)
+    x=sin(t)+randn(len(t))*0.1
+    y=smooth(x)
+    see also:
+    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
+    scipy.signal.lfilter
+    TODO: the window parameter could be the window itself if an array instead of a string
+    NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y.
+    """
+    if x.ndim != 1:
+        raise ValueError("smooth only accepts 1 dimension arrays.")
+    if x.size < window_len:
+        raise ValueError( "Input vector needs to be bigger than window size.")
+    if window_len < 3:
+        return x
+    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
+        raise ValueError( "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
+    s = np.r_[x[window_len - 1:0:-1], x, x[-2:-window_len - 1:-1]]
+    # print(len(s))
+    if window == 'flat':  # moving average
+        w = np.ones(window_len, 'd')
+    else:
+        w = eval('np.' + window + '(window_len)')
+    y = np.convolve(w / w.sum(), s, mode='valid')
+    return y
+if not len(sys.argv) == 3:
+    print('Format:')
+    print('python lib/demo.py config_file image_file')
+    exit(0)
+experiment_name = sys.argv[1].split('/')[-1][:-3]
+data_name = sys.argv[1].split('/')[-2]
+config_path = '.experiments.{}.{}'.format(data_name, experiment_name)
+def get_cstm_crop(image, detections):
+    #Image.fromarray(image).show()
+    image_width = image.shape[1]
+    image_height = image.shape[0]
+    det_box_scale = 1.42 #2.0#1.42
+    det_xmin = detections[2]
+    det_ymin = detections[3]
+    det_width = detections[4]
+    det_height = detections[5]
+    if det_width > det_height:
+        det_ymin -= (det_width - det_height)//2
+        det_height = det_width
+    if det_width < det_height:
+        det_xmin -= (det_height - det_width)//2
+        det_width = det_height
+    det_xmax = det_xmin + det_width - 1
+    det_ymax = det_ymin + det_height - 1
+    det_xmin -= int(det_width * (det_box_scale - 1) / 2)
+    det_ymin -= int(det_height * (det_box_scale - 1) / 2)
+    det_xmax += int(det_width * (det_box_scale - 1) / 2)
+    det_ymax += int(det_height * (det_box_scale - 1) / 2)
+    if det_xmin < 0 or det_ymin < 0:
+        min_overflow = min(det_xmin, det_ymin)
+        det_xmin += -min_overflow
+        det_ymin += -min_overflow
+    if det_xmax > image_width -1 or det_ymax > image_height - 1:
+        max_overflow = max(det_xmax - image_width -1, det_ymax - image_height-1)
+        det_xmax -= max_overflow
+        det_ymax -= max_overflow
+    det_width = det_xmax - det_xmin + 1
+    det_height = det_ymax - det_ymin + 1
+    det_crop = image[det_ymin:det_ymax, det_xmin:det_xmax, :]
+    return det_crop
+    #Image.fromarray(det_crop).show()
+    #exit()
+def demo_image(image_dir, pid, cam_dir, net, preprocess, cfg, input_size, net_stride, num_nb, use_gpu, device, flip=False, start_frame=0,
+               vertical_crop : bool = False,
+               static_crop : bool = False,
+               ):
+    detector = FaceBoxesDetector('FaceBoxes', '../PIPNet/FaceBoxesV2/weights/FaceBoxesV2.pth', use_gpu, device)
+    my_thresh = 0.6
+    det_box_scale = 1.2
+    meanface_indices, reverse_index1, reverse_index2, max_len = get_meanface(
+        os.path.join('../..', 'PIPNet', 'data', cfg.data_name, 'meanface.txt'), cfg.num_nb)
+    net.eval()
+    #CROP_DIR = '/mnt/rohan/cluster/angmar/sgiebenhain/now_crops_pipnet/'
+    #os.makedirs(CROP_DIR, exist_ok=True)
+    if start_frame > 0:
+        files = [f for f in os.listdir(f'{image_dir}/') if f.endswith('.jpg') or f.endswith('.png') and (((int(f.split('_')[-1].split('.')[0])-start_frame) % 3 )== 0)]
+    else:
+        files = [f for f in os.listdir(f'{image_dir}/') if f.endswith('.jpg') or f.endswith('.png')]
+    files.sort()
+    if not vertical_crop:
+        all_detections = []
+        all_images = []
+        #all_normals = []
+        succ_files = []
+        for file_name in files:
+                image = cv2.imread(f'{image_dir}/{file_name}')
+                #normals = cv2.imread(f'{image_dir}/../normals/{file_name[:-4]}.png')
+                if len(image.shape) < 3 or image.shape[-1] != 3:
+                    continue
+                image_height, image_width, _ = image.shape
+                detections, _ = detector.detect(image, my_thresh, 1)
+                dets_filtered = [det for det in detections if det[0] == 'face']
+                dets_filtered.sort(key=lambda x: -1 * x[1])
+                detections = dets_filtered
+                if detections[0][1] < 0.75:
+                    raise ValueError("Found face with too low detections confidence as max confidence")
+                all_detections.append(detections[0])
+                all_images.append(image)
+                #all_normals.append(normals)
+                succ_files.append(file_name)
+        if static_crop:
+            det1 = np.mean(np.array([x[2] for x in all_detections]), axis=0)
+            det2 = np.mean(np.array([x[3] for x in all_detections]), axis=0)
+            det3 = np.mean(np.array([x[4] for x in all_detections]), axis=0)
+            det4 = np.mean(np.array([x[5] for x in all_detections]), axis=0)
+            det_smoothed = np.stack([det1, det2, det3, det4], axis=0).astype(np.int32)
+            all_detections_smoothed = []  # = [[x[0], x[1], x_smoothed[0], x_smoothed[1], x_smoothed[2], x_smoothed[3]] for x, x_smoothed in zip()]
+            for i, det in enumerate(all_detections):
+                all_detections_smoothed.append(
+                    [det[0], det[1], det_smoothed[0], det_smoothed[1], det_smoothed[2], det_smoothed[3]])
+            all_detections = all_detections_smoothed
+        else:
+            if len(all_detections) > 11:
+                WINDOW_LENGTH = 11
+                det1 = smooth(np.array([x[2] for x in all_detections]), window_len=WINDOW_LENGTH)
+                det2 = smooth(np.array([x[3] for x in all_detections]), window_len=WINDOW_LENGTH)
+                det3 = smooth(np.array([x[4] for x in all_detections]), window_len=WINDOW_LENGTH)
+                det4 = smooth(np.array([x[5] for x in all_detections]), window_len=WINDOW_LENGTH)
+                det_smoothed = np.stack([det1, det2,det3,det4], axis=1).astype(np.int32)
+                all_detections_smoothed = [] #= [[x[0], x[1], x_smoothed[0], x_smoothed[1], x_smoothed[2], x_smoothed[3]] for x, x_smoothed in zip()]
+                for i, det in enumerate(all_detections):
+                    all_detections_smoothed.append([det[0], det[1], det_smoothed[i, 0], det_smoothed[i, 1], det_smoothed[i, 2], det_smoothed[i, 3]])
+                all_detections = all_detections_smoothed
+        # TODO: smooth detections!!!
+        for file_name, detection, image in zip(succ_files, all_detections, all_images):
+                        img_crop = get_cstm_crop(image, detection)
+                        #n_crop = get_cstm_crop(normals, detection)
+                        image = img_crop
+                        # save cropped image
+                        os.makedirs(f'{image_dir}/../cropped/', exist_ok=True)
+                        #os.makedirs(f'{image_dir}/../cropped_normals/', exist_ok=True)
+                        cv2.imwrite(f'{image_dir}/../cropped/{file_name}', cv2.resize(image, (512, 512)))
+                        #cv2.imwrite(f'{image_dir}/../cropped_normals/{file_name[:-4]}.png', cv2.resize(n_crop, (512, 512)))
+    else:
+        for file_name in files:
+            image = cv2.imread(f'{image_dir}/{file_name}')
+            if image.shape[0] != image.shape[1]:
+                image = image[220:-220, 640:-640, :]
+            os.makedirs(f'{image_dir}/../cropped/', exist_ok=True)
+            cv2.imwrite(f'{image_dir}/../cropped/{file_name}', cv2.resize(image, (512, 512)))
+    lms = []
+    image_dir = f'{image_dir}/../cropped/'
+    for file_name in files:
+                image = cv2.imread(f'{image_dir}/{file_name}')
+                if len(image.shape) < 3 or image.shape[-1] != 3:
+                    continue
+                if flip:
+                    image = cv2.transpose(image)
+                image_height, image_width, _ = image.shape
+                detections, _ = detector.detect(image, my_thresh, 1)
+                pred_export = None
+                dets_filtered = [det for det in detections if det[0] == 'face']
+                dets_filtered.sort(key=lambda x: -1 * x[1])
+                detections = dets_filtered
+                print(detections)
+                for i in range(min(1, len(detections))):
+                    if detections[i][1] < 0.99:
+                        continue
+                    det_xmin = detections[i][2]
+                    det_ymin = detections[i][3]
+                    det_width = detections[i][4]
+                    det_height = detections[i][5]
+                    det_xmax = det_xmin + det_width - 1
+                    det_ymax = det_ymin + det_height - 1
+                    det_xmin -= int(det_width * (det_box_scale - 1) / 2)
+                    # remove a part of top area for alignment, see paper for details
+                    det_ymin += int(det_height * (det_box_scale - 1) / 2)
+                    det_xmax += int(det_width * (det_box_scale - 1) / 2)
+                    det_ymax += int(det_height * (det_box_scale - 1) / 2)
+                    det_xmin = max(det_xmin, 0)
+                    det_ymin = max(det_ymin, 0)
+                    det_xmax = min(det_xmax, image_width - 1)
+                    det_ymax = min(det_ymax, image_height - 1)
+                    det_width = det_xmax - det_xmin + 1
+                    det_height = det_ymax - det_ymin + 1
+                    cv2.rectangle(image, (det_xmin, det_ymin), (det_xmax, det_ymax), (0, 0, 255), 2)
+                    det_crop = image[det_ymin:det_ymax, det_xmin:det_xmax, :]
+                    #np.save(f'{CROP_DIR}/{pid[:-4]}.npy', np.array([det_ymin, det_ymax, det_xmin, det_xmax]))
+                    det_crop = cv2.resize(det_crop, (input_size, input_size))
+                    inputs = Image.fromarray(det_crop[:, :, ::-1].astype('uint8'), 'RGB')
+                    #inputs.show()
+                    inputs = preprocess(inputs).unsqueeze(0)
+                    inputs = inputs.to(device)
+                    lms_pred_x, lms_pred_y, lms_pred_nb_x, lms_pred_nb_y, outputs_cls, max_cls = forward_pip(net,
+                                                                                                             inputs,
+                                                                                                             preprocess,
+                                                                                                             input_size,
+                                                                                                             net_stride,
+                                                                                                             num_nb)
+                    lms_pred = torch.cat((lms_pred_x, lms_pred_y), dim=1).flatten()
+                    tmp_nb_x = lms_pred_nb_x[reverse_index1, reverse_index2].view(cfg.num_lms, max_len)
+                    tmp_nb_y = lms_pred_nb_y[reverse_index1, reverse_index2].view(cfg.num_lms, max_len)
+                    tmp_x = torch.mean(torch.cat((lms_pred_x, tmp_nb_x), dim=1), dim=1).view(-1, 1)
+                    tmp_y = torch.mean(torch.cat((lms_pred_y, tmp_nb_y), dim=1), dim=1).view(-1, 1)
+                    lms_pred_merge = torch.cat((tmp_x, tmp_y), dim=1).flatten()
+                    lms_pred = lms_pred.cpu().numpy()
+                    lms_pred_merge = lms_pred_merge.cpu().numpy()
+                    pred_export = np.zeros([cfg.num_lms, 2])
+                    for i in range(cfg.num_lms):
+                        x_pred = lms_pred_merge[i * 2] * det_width
+                        y_pred = lms_pred_merge[i * 2 + 1] * det_height
+                        pred_export[i, 0] = (x_pred + det_xmin) / image_width
+                        pred_export[i, 1] = (y_pred + det_ymin) / image_height
+                        cv2.circle(image, (int(x_pred) + det_xmin, int(y_pred) + det_ymin), 1, (0, 0, 255), 2)
+                        if i == 76:
+                            cv2.circle(image, (int(x_pred) + det_xmin, int(y_pred) + det_ymin), 1, (255, 0, 0), 2)
+                if pred_export is not None:
+                    print('exporting stuff to ' + image_dir)
+                    landmakr_dir =  f'{image_dir}/../PIPnet_landmarks/'
+                    os.makedirs(landmakr_dir, exist_ok=True)
+                    np.save(landmakr_dir + f'/{file_name[:-4]}.npy', pred_export)
+                    lms.append(pred_export)
+                    exp_dir = image_dir + '/../PIPnet_annotated_images/'
+                    os.makedirs(exp_dir, exist_ok=True)
+                    cv2.imwrite(exp_dir + f'/{file_name}', image)
+                # cv2.imshow('1', image)
+                # cv2.waitKey(0)
+    lms = np.stack(lms, axis=0)
+    os.makedirs(f'{image_dir}/../pipnet', exist_ok=True)
+    np.save(f'{image_dir}/../pipnet/test.npy', lms)
+def run(exp_path, image_dir, start_frame = 0,
+        vertical_crop : bool = False,
+        static_crop : bool = False
+        ):
+    experiment_name = exp_path.split('/')[-1][:-3]
+    data_name = exp_path.split('/')[-2]
+    config_path = '.experiments.{}.{}'.format(data_name, experiment_name)
+    my_config = importlib.import_module(config_path, package='PIPNet')
+    Config = getattr(my_config, 'Config')
+    cfg = Config()
+    cfg.experiment_name = experiment_name
+    cfg.data_name = data_name
+    save_dir = os.path.join('../PIPNet/snapshots', cfg.data_name, cfg.experiment_name)
+    if cfg.backbone == 'resnet18':
+        resnet18 = models.resnet18(pretrained=cfg.pretrained)
+        net = Pip_resnet18(resnet18, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size,
+                           net_stride=cfg.net_stride)
+    elif cfg.backbone == 'resnet50':
+        resnet50 = models.resnet50(pretrained=cfg.pretrained)
+        net = Pip_resnet50(resnet50, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size,
+                           net_stride=cfg.net_stride)
+    elif cfg.backbone == 'resnet101':
+        resnet101 = models.resnet101(pretrained=cfg.pretrained)
+        net = Pip_resnet101(resnet101, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size,
+                            net_stride=cfg.net_stride)
+    elif cfg.backbone == 'mobilenet_v2':
+        mbnet = models.mobilenet_v2(pretrained=cfg.pretrained)
+        net = Pip_mbnetv2(mbnet, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size, net_stride=cfg.net_stride)
+    elif cfg.backbone == 'mobilenet_v3':
+        mbnet = mobilenetv3_large()
+        if cfg.pretrained:
+            mbnet.load_state_dict(torch.load('lib/mobilenetv3-large-1cd25616.pth'))
+        net = Pip_mbnetv3(mbnet, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size, net_stride=cfg.net_stride)
+    else:
+        print('No such backbone!')
+        exit(0)
+    if cfg.use_gpu:
+        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    else:
+        device = torch.device("cpu")
+    net = net.to(device)
+    weight_file = os.path.join(save_dir, 'epoch%d.pth' % (cfg.num_epochs - 1))
+    state_dict = torch.load(weight_file, map_location=device)
+    net.load_state_dict(state_dict)
+    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                     std=[0.229, 0.224, 0.225])
+    preprocess = transforms.Compose(
+        [transforms.Resize((cfg.input_size, cfg.input_size)), transforms.ToTensor(), normalize])
+    #for pid in pids:
+    pid = "FaMoS_180424_03335_TA_selfie_IMG_0092.jpg"
+    pid = "FaMoS_180426_03336_TA_selfie_IMG_0152.jpg"
+    demo_image(image_dir, pid, None, net, preprocess, cfg, cfg.input_size, cfg.net_stride, cfg.num_nb,
+                           cfg.use_gpu,
+                           device, start_frame=start_frame, vertical_crop=vertical_crop, static_crop=static_crop)
+if __name__ == '__main__':
+    base_path = '/mnt/rohan/cluster/valinor/jschmidt/becominglit/1015/HEADROT/img_cc_4/cam_220700191/'
+    base_path = '/home/giebenhain/try_tracking_obama2/rgb'
+    #base_base_path = '/home/giebenhain/test_videos_p3dmm_full/'
+    base_base_path = '/mnt/rohan/cluster/andram/sgiebenhain/test_video_p3dmm_full/'
+    v_names = [f for f in os.listdir(base_base_path) if f.startswith('th1k')]
+    print(v_names)
+    #v_names = list(range(800, 813))
+    #v_names = ['yu', 'marc', 'karla', 'karla_light', 'karla_glasses_hat', 'karla_glasses'] #['merlin', 'haoxuan']
+    for video_name in v_names:
+        base_path = f'{base_base_path}/{video_name}/rgb/'
+        #if os.path.exists(f'{base_path}/../cropped/'):
+        #    print('SKIP', base_path)
+        #    continue
+        start_frame = -1
+        vertical_crop=True
+        try:
+            run('experiments/WFLW/pip_32_16_60_r18_l2_l1_10_1_nb10.py', base_path, start_frame=start_frame, vertical_crop=False, static_crop=True)
+        except Exception as ex:
+            traceback.print_exc()