Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +27 -0
- .gitignore +174 -0
- LICENSE +209 -0
- README.md +299 -7
- app_sadtalker.py +109 -0
- checkpoints/SadTalker_V0.0.2_256.safetensors +3 -0
- checkpoints/SadTalker_V0.0.2_512.safetensors +3 -0
- checkpoints/mapping_00109-model.pth.tar +3 -0
- checkpoints/mapping_00229-model.pth.tar +3 -0
- cog.yaml +35 -0
- docs/FAQ.md +46 -0
- docs/best_practice.md +94 -0
- docs/changlelog.md +29 -0
- docs/example_crop.gif +3 -0
- docs/example_crop_still.gif +3 -0
- docs/example_full.gif +3 -0
- docs/example_full_crop.gif +0 -0
- docs/example_full_enhanced.gif +3 -0
- docs/face3d.md +47 -0
- docs/free_view_result.gif +3 -0
- docs/install.md +39 -0
- docs/resize_good.gif +3 -0
- docs/resize_no.gif +3 -0
- docs/sadtalker_logo.png +0 -0
- docs/using_ref_video.gif +3 -0
- docs/webui_extension.md +49 -0
- examples/driven_audio/RD_Radio31_000.wav +0 -0
- examples/driven_audio/RD_Radio34_002.wav +0 -0
- examples/driven_audio/RD_Radio36_000.wav +0 -0
- examples/driven_audio/RD_Radio40_000.wav +0 -0
- examples/driven_audio/bus_chinese.wav +0 -0
- examples/driven_audio/chinese_news.wav +3 -0
- examples/driven_audio/chinese_poem1.wav +0 -0
- examples/driven_audio/chinese_poem2.wav +0 -0
- examples/driven_audio/deyu.wav +3 -0
- examples/driven_audio/eluosi.wav +3 -0
- examples/driven_audio/fayu.wav +3 -0
- examples/driven_audio/imagine.wav +3 -0
- examples/driven_audio/itosinger1.wav +0 -0
- examples/driven_audio/japanese.wav +3 -0
- examples/ref_video/WDA_AlexandriaOcasioCortez_000.mp4 +3 -0
- examples/ref_video/WDA_KatieHill_000.mp4 +3 -0
- examples/source_image/art_0.png +0 -0
- examples/source_image/art_1.png +0 -0
- examples/source_image/art_10.png +0 -0
- examples/source_image/art_11.png +0 -0
- examples/source_image/art_12.png +0 -0
- examples/source_image/art_13.png +0 -0
- examples/source_image/art_14.png +0 -0
- examples/source_image/art_15.png +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,30 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
docs/example_crop.gif filter=lfs diff=lfs merge=lfs -text
|
37 |
+
docs/example_crop_still.gif filter=lfs diff=lfs merge=lfs -text
|
38 |
+
docs/example_full.gif filter=lfs diff=lfs merge=lfs -text
|
39 |
+
docs/example_full_enhanced.gif filter=lfs diff=lfs merge=lfs -text
|
40 |
+
docs/free_view_result.gif filter=lfs diff=lfs merge=lfs -text
|
41 |
+
docs/resize_good.gif filter=lfs diff=lfs merge=lfs -text
|
42 |
+
docs/resize_no.gif filter=lfs diff=lfs merge=lfs -text
|
43 |
+
docs/using_ref_video.gif filter=lfs diff=lfs merge=lfs -text
|
44 |
+
examples/driven_audio/chinese_news.wav filter=lfs diff=lfs merge=lfs -text
|
45 |
+
examples/driven_audio/deyu.wav filter=lfs diff=lfs merge=lfs -text
|
46 |
+
examples/driven_audio/eluosi.wav filter=lfs diff=lfs merge=lfs -text
|
47 |
+
examples/driven_audio/fayu.wav filter=lfs diff=lfs merge=lfs -text
|
48 |
+
examples/driven_audio/imagine.wav filter=lfs diff=lfs merge=lfs -text
|
49 |
+
examples/driven_audio/japanese.wav filter=lfs diff=lfs merge=lfs -text
|
50 |
+
examples/ref_video/WDA_AlexandriaOcasioCortez_000.mp4 filter=lfs diff=lfs merge=lfs -text
|
51 |
+
examples/ref_video/WDA_KatieHill_000.mp4 filter=lfs diff=lfs merge=lfs -text
|
52 |
+
examples/source_image/art_16.png filter=lfs diff=lfs merge=lfs -text
|
53 |
+
examples/source_image/art_17.png filter=lfs diff=lfs merge=lfs -text
|
54 |
+
examples/source_image/art_3.png filter=lfs diff=lfs merge=lfs -text
|
55 |
+
examples/source_image/art_4.png filter=lfs diff=lfs merge=lfs -text
|
56 |
+
examples/source_image/art_5.png filter=lfs diff=lfs merge=lfs -text
|
57 |
+
examples/source_image/art_8.png filter=lfs diff=lfs merge=lfs -text
|
58 |
+
examples/source_image/art_9.png filter=lfs diff=lfs merge=lfs -text
|
59 |
+
results/2a145f10-368f-4c49-ad9c-d81f26620673/WDA_AlexandriaOcasioCortez_000_full.mp4 filter=lfs diff=lfs merge=lfs -text
|
60 |
+
results/3c31eee7-6a33-4251-9f60-0d435d2cedea/WDA_KatieHill_000_enhanced.mp4 filter=lfs diff=lfs merge=lfs -text
|
61 |
+
results/43e6d2d2-a0ad-4c24-bc7b-58c9231d2d59/WDA_KatieHill_000_enhanced.mp4 filter=lfs diff=lfs merge=lfs -text
|
62 |
+
results/ef1b868f-279e-4981-917b-29e4b6559046/WDA_AlexandriaOcasioCortez_000_full.mp4 filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
.idea/
|
161 |
+
|
162 |
+
examples/results/*
|
163 |
+
gfpgan/*
|
164 |
+
checkpoints/*
|
165 |
+
assets/*
|
166 |
+
results/*
|
167 |
+
Dockerfile
|
168 |
+
start_docker.sh
|
169 |
+
start.sh
|
170 |
+
|
171 |
+
checkpoints
|
172 |
+
|
173 |
+
# Mac
|
174 |
+
.DS_Store
|
LICENSE
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Tencent is pleased to support the open source community by making SadTalker available.
|
2 |
+
|
3 |
+
Copyright (C), a Tencent company. All rights reserved.
|
4 |
+
|
5 |
+
SadTalker is licensed under the Apache 2.0 License, except for the third-party components listed below.
|
6 |
+
|
7 |
+
Terms of the Apache License Version 2.0:
|
8 |
+
---------------------------------------------
|
9 |
+
Apache License
|
10 |
+
Version 2.0, January 2004
|
11 |
+
http://www.apache.org/licenses/
|
12 |
+
|
13 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
14 |
+
|
15 |
+
1. Definitions.
|
16 |
+
|
17 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
18 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
19 |
+
|
20 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
21 |
+
the copyright owner that is granting the License.
|
22 |
+
|
23 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
24 |
+
other entities that control, are controlled by, or are under common
|
25 |
+
control with that entity. For the purposes of this definition,
|
26 |
+
"control" means (i) the power, direct or indirect, to cause the
|
27 |
+
direction or management of such entity, whether by contract or
|
28 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
29 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
30 |
+
|
31 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
32 |
+
exercising permissions granted by this License.
|
33 |
+
|
34 |
+
"Source" form shall mean the preferred form for making modifications,
|
35 |
+
including but not limited to software source code, documentation
|
36 |
+
source, and configuration files.
|
37 |
+
|
38 |
+
"Object" form shall mean any form resulting from mechanical
|
39 |
+
transformation or translation of a Source form, including but
|
40 |
+
not limited to compiled object code, generated documentation,
|
41 |
+
and conversions to other media types.
|
42 |
+
|
43 |
+
"Work" shall mean the work of authorship, whether in Source or
|
44 |
+
Object form, made available under the License, as indicated by a
|
45 |
+
copyright notice that is included in or attached to the work
|
46 |
+
(an example is provided in the Appendix below).
|
47 |
+
|
48 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
49 |
+
form, that is based on (or derived from) the Work and for which the
|
50 |
+
editorial revisions, annotations, elaborations, or other modifications
|
51 |
+
represent, as a whole, an original work of authorship. For the purposes
|
52 |
+
of this License, Derivative Works shall not include works that remain
|
53 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
54 |
+
the Work and Derivative Works thereof.
|
55 |
+
|
56 |
+
"Contribution" shall mean any work of authorship, including
|
57 |
+
the original version of the Work and any modifications or additions
|
58 |
+
to that Work or Derivative Works thereof, that is intentionally
|
59 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
60 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
61 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
62 |
+
means any form of electronic, verbal, or written communication sent
|
63 |
+
to the Licensor or its representatives, including but not limited to
|
64 |
+
communication on electronic mailing lists, source code control systems,
|
65 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
66 |
+
Licensor for the purpose of discussing and improving the Work, but
|
67 |
+
excluding communication that is conspicuously marked or otherwise
|
68 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
69 |
+
|
70 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
71 |
+
on behalf of whom a Contribution has been received by Licensor and
|
72 |
+
subsequently incorporated within the Work.
|
73 |
+
|
74 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
75 |
+
this License, each Contributor hereby grants to You a perpetual,
|
76 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
77 |
+
copyright license to reproduce, prepare Derivative Works of,
|
78 |
+
publicly display, publicly perform, sublicense, and distribute the
|
79 |
+
Work and such Derivative Works in Source or Object form.
|
80 |
+
|
81 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
82 |
+
this License, each Contributor hereby grants to You a perpetual,
|
83 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
84 |
+
(except as stated in this section) patent license to make, have made,
|
85 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
86 |
+
where such license applies only to those patent claims licensable
|
87 |
+
by such Contributor that are necessarily infringed by their
|
88 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
89 |
+
with the Work to which such Contribution(s) was submitted. If You
|
90 |
+
institute patent litigation against any entity (including a
|
91 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
92 |
+
or a Contribution incorporated within the Work constitutes direct
|
93 |
+
or contributory patent infringement, then any patent licenses
|
94 |
+
granted to You under this License for that Work shall terminate
|
95 |
+
as of the date such litigation is filed.
|
96 |
+
|
97 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
98 |
+
Work or Derivative Works thereof in any medium, with or without
|
99 |
+
modifications, and in Source or Object form, provided that You
|
100 |
+
meet the following conditions:
|
101 |
+
|
102 |
+
(a) You must give any other recipients of the Work or
|
103 |
+
Derivative Works a copy of this License; and
|
104 |
+
|
105 |
+
(b) You must cause any modified files to carry prominent notices
|
106 |
+
stating that You changed the files; and
|
107 |
+
|
108 |
+
(c) You must retain, in the Source form of any Derivative Works
|
109 |
+
that You distribute, all copyright, patent, trademark, and
|
110 |
+
attribution notices from the Source form of the Work,
|
111 |
+
excluding those notices that do not pertain to any part of
|
112 |
+
the Derivative Works; and
|
113 |
+
|
114 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
115 |
+
distribution, then any Derivative Works that You distribute must
|
116 |
+
include a readable copy of the attribution notices contained
|
117 |
+
within such NOTICE file, excluding those notices that do not
|
118 |
+
pertain to any part of the Derivative Works, in at least one
|
119 |
+
of the following places: within a NOTICE text file distributed
|
120 |
+
as part of the Derivative Works; within the Source form or
|
121 |
+
documentation, if provided along with the Derivative Works; or,
|
122 |
+
within a display generated by the Derivative Works, if and
|
123 |
+
wherever such third-party notices normally appear. The contents
|
124 |
+
of the NOTICE file are for informational purposes only and
|
125 |
+
do not modify the License. You may add Your own attribution
|
126 |
+
notices within Derivative Works that You distribute, alongside
|
127 |
+
or as an addendum to the NOTICE text from the Work, provided
|
128 |
+
that such additional attribution notices cannot be construed
|
129 |
+
as modifying the License.
|
130 |
+
|
131 |
+
You may add Your own copyright statement to Your modifications and
|
132 |
+
may provide additional or different license terms and conditions
|
133 |
+
for use, reproduction, or distribution of Your modifications, or
|
134 |
+
for any such Derivative Works as a whole, provided Your use,
|
135 |
+
reproduction, and distribution of the Work otherwise complies with
|
136 |
+
the conditions stated in this License.
|
137 |
+
|
138 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
139 |
+
any Contribution intentionally submitted for inclusion in the Work
|
140 |
+
by You to the Licensor shall be under the terms and conditions of
|
141 |
+
this License, without any additional terms or conditions.
|
142 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
143 |
+
the terms of any separate license agreement you may have executed
|
144 |
+
with Licensor regarding such Contributions.
|
145 |
+
|
146 |
+
6. Trademarks. This License does not grant permission to use the trade
|
147 |
+
names, trademarks, service marks, or product names of the Licensor,
|
148 |
+
except as required for reasonable and customary use in describing the
|
149 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
150 |
+
|
151 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
152 |
+
agreed to in writing, Licensor provides the Work (and each
|
153 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
154 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
155 |
+
implied, including, without limitation, any warranties or conditions
|
156 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
157 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
158 |
+
appropriateness of using or redistributing the Work and assume any
|
159 |
+
risks associated with Your exercise of permissions under this License.
|
160 |
+
|
161 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
162 |
+
whether in tort (including negligence), contract, or otherwise,
|
163 |
+
unless required by applicable law (such as deliberate and grossly
|
164 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
165 |
+
liable to You for damages, including any direct, indirect, special,
|
166 |
+
incidental, or consequential damages of any character arising as a
|
167 |
+
result of this License or out of the use or inability to use the
|
168 |
+
Work (including but not limited to damages for loss of goodwill,
|
169 |
+
work stoppage, computer failure or malfunction, or any and all
|
170 |
+
other commercial damages or losses), even if such Contributor
|
171 |
+
has been advised of the possibility of such damages.
|
172 |
+
|
173 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
174 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
175 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
176 |
+
or other liability obligations and/or rights consistent with this
|
177 |
+
License. However, in accepting such obligations, You may act only
|
178 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
179 |
+
of any other Contributor, and only if You agree to indemnify,
|
180 |
+
defend, and hold each Contributor harmless for any liability
|
181 |
+
incurred by, or claims asserted against, such Contributor by reason
|
182 |
+
of your accepting any such warranty or additional liability.
|
183 |
+
|
184 |
+
END OF TERMS AND CONDITIONS
|
185 |
+
|
186 |
+
APPENDIX: How to apply the Apache License to your work.
|
187 |
+
|
188 |
+
To apply the Apache License to your work, attach the following
|
189 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
190 |
+
replaced with your own identifying information. (Don't include
|
191 |
+
the brackets!) The text should be enclosed in the appropriate
|
192 |
+
comment syntax for the file format. We also recommend that a
|
193 |
+
file or class name and description of purpose be included on the
|
194 |
+
same "printed page" as the copyright notice for easier
|
195 |
+
identification within third-party archives.
|
196 |
+
|
197 |
+
Copyright [yyyy] [name of copyright owner]
|
198 |
+
|
199 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
200 |
+
you may not use this file except in compliance with the License.
|
201 |
+
You may obtain a copy of the License at
|
202 |
+
|
203 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
204 |
+
|
205 |
+
Unless required by applicable law or agreed to in writing, software
|
206 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
207 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
208 |
+
See the License for the specific language governing permissions and
|
209 |
+
limitations under the License.
|
README.md
CHANGED
@@ -1,12 +1,304 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: blue
|
5 |
-
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.47.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
|
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: NamastexLabs_SadTalker
|
3 |
+
app_file: app_sadtalker.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 3.47.1
|
|
|
|
|
6 |
---
|
7 |
+
<div align="center">
|
8 |
|
9 |
+
<img src='https://user-images.githubusercontent.com/4397546/229094115-862c747e-7397-4b54-ba4a-bd368bfe2e0f.png' width='500px'/>
|
10 |
+
|
11 |
+
|
12 |
+
<!--<h2> 😭 SadTalker: <span style="font-size:12px">Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation </span> </h2> -->
|
13 |
+
|
14 |
+
<a href='https://arxiv.org/abs/2211.12194'><img src='https://img.shields.io/badge/ArXiv-PDF-red'></a> <a href='https://sadtalker.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a> [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Winfredy/SadTalker/blob/main/quick_demo.ipynb) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/vinthony/SadTalker) [![sd webui-colab](https://img.shields.io/badge/Automatic1111-Colab-green)](https://colab.research.google.com/github/camenduru/stable-diffusion-webui-colab/blob/main/video/stable/stable_diffusion_1_5_video_webui_colab.ipynb) <br> [![Replicate](https://replicate.com/cjwbw/sadtalker/badge)](https://replicate.com/cjwbw/sadtalker) [![Discord](https://dcbadge.vercel.app/api/server/rrayYqZ4tf?style=flat)](https://discord.gg/rrayYqZ4tf)
|
15 |
+
|
16 |
+
<div>
|
17 |
+
<a target='_blank'>Wenxuan Zhang <sup>*,1,2</sup> </a> 
|
18 |
+
<a href='https://vinthony.github.io/' target='_blank'>Xiaodong Cun <sup>*,2</a> 
|
19 |
+
<a href='https://xuanwangvc.github.io/' target='_blank'>Xuan Wang <sup>3</sup></a> 
|
20 |
+
<a href='https://yzhang2016.github.io/' target='_blank'>Yong Zhang <sup>2</sup></a> 
|
21 |
+
<a href='https://xishen0220.github.io/' target='_blank'>Xi Shen <sup>2</sup></a>  </br>
|
22 |
+
<a href='https://yuguo-xjtu.github.io/' target='_blank'>Yu Guo<sup>1</sup> </a> 
|
23 |
+
<a href='https://scholar.google.com/citations?hl=zh-CN&user=4oXBp9UAAAAJ' target='_blank'>Ying Shan <sup>2</sup> </a> 
|
24 |
+
<a target='_blank'>Fei Wang <sup>1</sup> </a> 
|
25 |
+
</div>
|
26 |
+
<br>
|
27 |
+
<div>
|
28 |
+
<sup>1</sup> Xi'an Jiaotong University   <sup>2</sup> Tencent AI Lab   <sup>3</sup> Ant Group  
|
29 |
+
</div>
|
30 |
+
<br>
|
31 |
+
<i><strong><a href='https://arxiv.org/abs/2211.12194' target='_blank'>CVPR 2023</a></strong></i>
|
32 |
+
<br>
|
33 |
+
<br>
|
34 |
+
|
35 |
+
|
36 |
+
![sadtalker](https://user-images.githubusercontent.com/4397546/222490039-b1f6156b-bf00-405b-9fda-0c9a9156f991.gif)
|
37 |
+
|
38 |
+
<b>TL;DR: single portrait image 🙎♂️ + audio 🎤 = talking head video 🎞.</b>
|
39 |
+
|
40 |
+
<br>
|
41 |
+
|
42 |
+
</div>
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
## Highlights
|
47 |
+
|
48 |
+
- The license has been updated to Apache 2.0, and we've removed the non-commercial restriction
|
49 |
+
- **SadTalker has now officially been integrated into Discord, where you can use it for free by sending files. You can also generate high-quailty videos from text prompts. Join: [![Discord](https://dcbadge.vercel.app/api/server/rrayYqZ4tf?style=flat)](https://discord.gg/rrayYqZ4tf)**
|
50 |
+
|
51 |
+
- We've published a [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) extension. Check out more details [here](docs/webui_extension.md). [Demo Video](https://user-images.githubusercontent.com/4397546/231495639-5d4bb925-ea64-4a36-a519-6389917dac29.mp4)
|
52 |
+
|
53 |
+
- Full image mode is now available! [More details...](https://github.com/OpenTalker/SadTalker#full-bodyimage-generation)
|
54 |
+
|
55 |
+
| still+enhancer in v0.0.1 | still + enhancer in v0.0.2 | [input image @bagbag1815](https://twitter.com/bagbag1815/status/1642754319094108161) |
|
56 |
+
|:--------------------: |:--------------------: | :----: |
|
57 |
+
| <video src="https://user-images.githubusercontent.com/48216707/229484996-5d7be64f-2553-4c9e-a452-c5cf0b8ebafe.mp4" type="video/mp4"> </video> | <video src="https://user-images.githubusercontent.com/4397546/230717873-355b7bf3-d3de-49f9-a439-9220e623fce7.mp4" type="video/mp4"> </video> | <img src='./examples/source_image/full_body_2.png' width='380'>
|
58 |
+
|
59 |
+
- Several new modes (Still, reference, and resize modes) are now available!
|
60 |
+
|
61 |
+
- We're happy to see more community demos on [bilibili](https://search.bilibili.com/all?keyword=sadtalker), [YouTube](https://www.youtube.com/results?search_query=sadtalker) and [X (#sadtalker)](https://twitter.com/search?q=%23sadtalker&src).
|
62 |
+
|
63 |
+
## Changelog
|
64 |
+
|
65 |
+
The previous changelog can be found [here](docs/changlelog.md).
|
66 |
+
|
67 |
+
- __[2023.06.12]__: Added more new features in WebUI extension, see the discussion [here](https://github.com/OpenTalker/SadTalker/discussions/386).
|
68 |
+
|
69 |
+
- __[2023.06.05]__: Released a new 512x512px (beta) face model. Fixed some bugs and improve the performance.
|
70 |
+
|
71 |
+
- __[2023.04.15]__: Added a WebUI Colab notebook by [@camenduru](https://github.com/camenduru/): [![sd webui-colab](https://img.shields.io/badge/Automatic1111-Colab-green)](https://colab.research.google.com/github/camenduru/stable-diffusion-webui-colab/blob/main/video/stable/stable_diffusion_1_5_video_webui_colab.ipynb)
|
72 |
+
|
73 |
+
- __[2023.04.12]__: Added a more detailed WebUI installation document and fixed a problem when reinstalling.
|
74 |
+
|
75 |
+
- __[2023.04.12]__: Fixed the WebUI safe issues becasue of 3rd-party packages, and optimized the output path in `sd-webui-extension`.
|
76 |
+
|
77 |
+
- __[2023.04.08]__: In v0.0.2, we added a logo watermark to the generated video to prevent abuse. _This watermark has since been removed in a later release._
|
78 |
+
|
79 |
+
- __[2023.04.08]__: In v0.0.2, we added features for full image animation and a link to download checkpoints from Baidu. We also optimized the enhancer logic.
|
80 |
+
|
81 |
+
## To-Do
|
82 |
+
|
83 |
+
We're tracking new updates in [issue #280](https://github.com/OpenTalker/SadTalker/issues/280).
|
84 |
+
|
85 |
+
## Troubleshooting
|
86 |
+
|
87 |
+
If you have any problems, please read our [FAQs](docs/FAQ.md) before opening an issue.
|
88 |
+
|
89 |
+
|
90 |
+
|
91 |
+
## 1. Installation.
|
92 |
+
|
93 |
+
Community tutorials: [中文Windows教程 (Chinese Windows tutorial)](https://www.bilibili.com/video/BV1Dc411W7V6/) | [日本語コース (Japanese tutorial)](https://br-d.fanbox.cc/posts/5685086).
|
94 |
+
|
95 |
+
### Linux/Unix
|
96 |
+
|
97 |
+
1. Install [Anaconda](https://www.anaconda.com/), Python and `git`.
|
98 |
+
|
99 |
+
2. Creating the env and install the requirements.
|
100 |
+
```bash
|
101 |
+
git clone https://github.com/OpenTalker/SadTalker.git
|
102 |
+
|
103 |
+
cd SadTalker
|
104 |
+
|
105 |
+
conda create -n sadtalker python=3.8
|
106 |
+
|
107 |
+
conda activate sadtalker
|
108 |
+
|
109 |
+
pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
|
110 |
+
|
111 |
+
conda install ffmpeg
|
112 |
+
|
113 |
+
pip install -r requirements.txt
|
114 |
+
|
115 |
+
### Coqui TTS is optional for gradio demo.
|
116 |
+
### pip install TTS
|
117 |
+
|
118 |
+
```
|
119 |
+
### Windows
|
120 |
+
|
121 |
+
A video tutorial in chinese is available [here](https://www.bilibili.com/video/BV1Dc411W7V6/). You can also follow the following instructions:
|
122 |
+
|
123 |
+
1. Install [Python 3.8](https://www.python.org/downloads/windows/) and check "Add Python to PATH".
|
124 |
+
2. Install [git](https://git-scm.com/download/win) manually or using [Scoop](https://scoop.sh/): `scoop install git`.
|
125 |
+
3. Install `ffmpeg`, following [this tutorial](https://www.wikihow.com/Install-FFmpeg-on-Windows) or using [scoop](https://scoop.sh/): `scoop install ffmpeg`.
|
126 |
+
4. Download the SadTalker repository by running `git clone https://github.com/Winfredy/SadTalker.git`.
|
127 |
+
5. Download the checkpoints and gfpgan models in the [downloads section](#2-download-models).
|
128 |
+
6. Run `start.bat` from Windows Explorer as normal, non-administrator, user, and a Gradio-powered WebUI demo will be started.
|
129 |
+
|
130 |
+
### macOS
|
131 |
+
|
132 |
+
A tutorial on installing SadTalker on macOS can be found [here](docs/install.md).
|
133 |
+
|
134 |
+
### Docker, WSL, etc
|
135 |
+
|
136 |
+
Please check out additional tutorials [here](docs/install.md).
|
137 |
+
|
138 |
+
## 2. Download Models
|
139 |
+
|
140 |
+
You can run the following script on Linux/macOS to automatically download all the models:
|
141 |
+
|
142 |
+
```bash
|
143 |
+
bash scripts/download_models.sh
|
144 |
+
```
|
145 |
+
|
146 |
+
We also provide an offline patch (`gfpgan/`), so no model will be downloaded when generating.
|
147 |
+
|
148 |
+
### Pre-Trained Models
|
149 |
+
|
150 |
+
* [Google Drive](https://drive.google.com/file/d/1gwWh45pF7aelNP_P78uDJL8Sycep-K7j/view?usp=sharing)
|
151 |
+
* [GitHub Releases](https://github.com/OpenTalker/SadTalker/releases)
|
152 |
+
* [Baidu (百度云盘)](https://pan.baidu.com/s/1kb1BCPaLOWX1JJb9Czbn6w?pwd=sadt) (Password: `sadt`)
|
153 |
+
|
154 |
+
<!-- TODO add Hugging Face links -->
|
155 |
+
|
156 |
+
### GFPGAN Offline Patch
|
157 |
+
|
158 |
+
* [Google Drive](https://drive.google.com/file/d/19AIBsmfcHW6BRJmeqSFlG5fL445Xmsyi?usp=sharing)
|
159 |
+
* [GitHub Releases](https://github.com/OpenTalker/SadTalker/releases)
|
160 |
+
* [Baidu (百度云盘)](https://pan.baidu.com/s/1P4fRgk9gaSutZnn8YW034Q?pwd=sadt) (Password: `sadt`)
|
161 |
+
|
162 |
+
<!-- TODO add Hugging Face links -->
|
163 |
+
|
164 |
+
|
165 |
+
<details><summary>Model Details</summary>
|
166 |
+
|
167 |
+
|
168 |
+
Model explains:
|
169 |
+
|
170 |
+
##### New version
|
171 |
+
| Model | Description
|
172 |
+
| :--- | :----------
|
173 |
+
|checkpoints/mapping_00229-model.pth.tar | Pre-trained MappingNet in Sadtalker.
|
174 |
+
|checkpoints/mapping_00109-model.pth.tar | Pre-trained MappingNet in Sadtalker.
|
175 |
+
|checkpoints/SadTalker_V0.0.2_256.safetensors | packaged sadtalker checkpoints of old version, 256 face render).
|
176 |
+
|checkpoints/SadTalker_V0.0.2_512.safetensors | packaged sadtalker checkpoints of old version, 512 face render).
|
177 |
+
|gfpgan/weights | Face detection and enhanced models used in `facexlib` and `gfpgan`.
|
178 |
+
|
179 |
+
|
180 |
+
##### Old version
|
181 |
+
| Model | Description
|
182 |
+
| :--- | :----------
|
183 |
+
|checkpoints/auido2exp_00300-model.pth | Pre-trained ExpNet in Sadtalker.
|
184 |
+
|checkpoints/auido2pose_00140-model.pth | Pre-trained PoseVAE in Sadtalker.
|
185 |
+
|checkpoints/mapping_00229-model.pth.tar | Pre-trained MappingNet in Sadtalker.
|
186 |
+
|checkpoints/mapping_00109-model.pth.tar | Pre-trained MappingNet in Sadtalker.
|
187 |
+
|checkpoints/facevid2vid_00189-model.pth.tar | Pre-trained face-vid2vid model from [the reappearance of face-vid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis).
|
188 |
+
|checkpoints/epoch_20.pth | Pre-trained 3DMM extractor in [Deep3DFaceReconstruction](https://github.com/microsoft/Deep3DFaceReconstruction).
|
189 |
+
|checkpoints/wav2lip.pth | Highly accurate lip-sync model in [Wav2lip](https://github.com/Rudrabha/Wav2Lip).
|
190 |
+
|checkpoints/shape_predictor_68_face_landmarks.dat | Face landmark model used in [dilb](http://dlib.net/).
|
191 |
+
|checkpoints/BFM | 3DMM library file.
|
192 |
+
|checkpoints/hub | Face detection models used in [face alignment](https://github.com/1adrianb/face-alignment).
|
193 |
+
|gfpgan/weights | Face detection and enhanced models used in `facexlib` and `gfpgan`.
|
194 |
+
|
195 |
+
The final folder will be shown as:
|
196 |
+
|
197 |
+
<img width="331" alt="image" src="https://user-images.githubusercontent.com/4397546/232511411-4ca75cbf-a434-48c5-9ae0-9009e8316484.png">
|
198 |
+
|
199 |
+
|
200 |
+
</details>
|
201 |
+
|
202 |
+
## 3. Quick Start
|
203 |
+
|
204 |
+
Please read our document on [best practices and configuration tips](docs/best_practice.md)
|
205 |
+
|
206 |
+
### WebUI Demos
|
207 |
+
|
208 |
+
**Online Demo**: [HuggingFace](https://huggingface.co/spaces/vinthony/SadTalker) | [SDWebUI-Colab](https://colab.research.google.com/github/camenduru/stable-diffusion-webui-colab/blob/main/video/stable/stable_diffusion_1_5_video_webui_colab.ipynb) | [Colab](https://colab.research.google.com/github/Winfredy/SadTalker/blob/main/quick_demo.ipynb)
|
209 |
+
|
210 |
+
**Local WebUI extension**: Please refer to [WebUI docs](docs/webui_extension.md).
|
211 |
+
|
212 |
+
**Local gradio demo (recommanded)**: A Gradio instance similar to our [Hugging Face demo](https://huggingface.co/spaces/vinthony/SadTalker) can be run locally:
|
213 |
+
|
214 |
+
```bash
|
215 |
+
## you need manually install TTS(https://github.com/coqui-ai/TTS) via `pip install tts` in advanced.
|
216 |
+
python app_sadtalker.py
|
217 |
+
```
|
218 |
+
|
219 |
+
You can also start it more easily:
|
220 |
+
|
221 |
+
- windows: just double click `webui.bat`, the requirements will be installed automatically.
|
222 |
+
- Linux/Mac OS: run `bash webui.sh` to start the webui.
|
223 |
+
|
224 |
+
|
225 |
+
### CLI usage
|
226 |
+
|
227 |
+
##### Animating a portrait image from default config:
|
228 |
+
```bash
|
229 |
+
python inference.py --driven_audio <audio.wav> \
|
230 |
+
--source_image <video.mp4 or picture.png> \
|
231 |
+
--enhancer gfpgan
|
232 |
+
```
|
233 |
+
The results will be saved in `results/$SOME_TIMESTAMP/*.mp4`.
|
234 |
+
|
235 |
+
##### Full body/image Generation:
|
236 |
+
|
237 |
+
Using `--still` to generate a natural full body video. You can add `enhancer` to improve the quality of the generated video.
|
238 |
+
|
239 |
+
```bash
|
240 |
+
python inference.py --driven_audio <audio.wav> \
|
241 |
+
--source_image <video.mp4 or picture.png> \
|
242 |
+
--result_dir <a file to store results> \
|
243 |
+
--still \
|
244 |
+
--preprocess full \
|
245 |
+
--enhancer gfpgan
|
246 |
+
```
|
247 |
+
|
248 |
+
More examples and configuration and tips can be founded in the [ >>> best practice documents <<<](docs/best_practice.md).
|
249 |
+
|
250 |
+
## Citation
|
251 |
+
|
252 |
+
If you find our work useful in your research, please consider citing:
|
253 |
+
|
254 |
+
```bibtex
|
255 |
+
@article{zhang2022sadtalker,
|
256 |
+
title={SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation},
|
257 |
+
author={Zhang, Wenxuan and Cun, Xiaodong and Wang, Xuan and Zhang, Yong and Shen, Xi and Guo, Yu and Shan, Ying and Wang, Fei},
|
258 |
+
journal={arXiv preprint arXiv:2211.12194},
|
259 |
+
year={2022}
|
260 |
+
}
|
261 |
+
```
|
262 |
+
|
263 |
+
## Acknowledgements
|
264 |
+
|
265 |
+
Facerender code borrows heavily from [zhanglonghao's reproduction of face-vid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis) and [PIRender](https://github.com/RenYurui/PIRender). We thank the authors for sharing their wonderful code. In training process, we also used the model from [Deep3DFaceReconstruction](https://github.com/microsoft/Deep3DFaceReconstruction) and [Wav2lip](https://github.com/Rudrabha/Wav2Lip). We thank for their wonderful work.
|
266 |
+
|
267 |
+
We also use the following 3rd-party libraries:
|
268 |
+
|
269 |
+
- **Face Utils**: https://github.com/xinntao/facexlib
|
270 |
+
- **Face Enhancement**: https://github.com/TencentARC/GFPGAN
|
271 |
+
- **Image/Video Enhancement**:https://github.com/xinntao/Real-ESRGAN
|
272 |
+
|
273 |
+
## Extensions:
|
274 |
+
|
275 |
+
- [SadTalker-Video-Lip-Sync](https://github.com/Zz-ww/SadTalker-Video-Lip-Sync) from [@Zz-ww](https://github.com/Zz-ww): SadTalker for Video Lip Editing
|
276 |
+
|
277 |
+
## Related Works
|
278 |
+
- [StyleHEAT: One-Shot High-Resolution Editable Talking Face Generation via Pre-trained StyleGAN (ECCV 2022)](https://github.com/FeiiYin/StyleHEAT)
|
279 |
+
- [CodeTalker: Speech-Driven 3D Facial Animation with Discrete Motion Prior (CVPR 2023)](https://github.com/Doubiiu/CodeTalker)
|
280 |
+
- [VideoReTalking: Audio-based Lip Synchronization for Talking Head Video Editing In the Wild (SIGGRAPH Asia 2022)](https://github.com/vinthony/video-retalking)
|
281 |
+
- [DPE: Disentanglement of Pose and Expression for General Video Portrait Editing (CVPR 2023)](https://github.com/Carlyx/DPE)
|
282 |
+
- [3D GAN Inversion with Facial Symmetry Prior (CVPR 2023)](https://github.com/FeiiYin/SPI/)
|
283 |
+
- [T2M-GPT: Generating Human Motion from Textual Descriptions with Discrete Representations (CVPR 2023)](https://github.com/Mael-zys/T2M-GPT)
|
284 |
+
|
285 |
+
## Disclaimer
|
286 |
+
|
287 |
+
This is not an official product of Tencent.
|
288 |
+
|
289 |
+
```
|
290 |
+
1. Please carefully read and comply with the open-source license applicable to this code before using it.
|
291 |
+
2. Please carefully read and comply with the intellectual property declaration applicable to this code before using it.
|
292 |
+
3. This open-source code runs completely offline and does not collect any personal information or other data. If you use this code to provide services to end-users and collect related data, please take necessary compliance measures according to applicable laws and regulations (such as publishing privacy policies, adopting necessary data security strategies, etc.). If the collected data involves personal information, user consent must be obtained (if applicable). Any legal liabilities arising from this are unrelated to Tencent.
|
293 |
+
4. Without Tencent's written permission, you are not authorized to use the names or logos legally owned by Tencent, such as "Tencent." Otherwise, you may be liable for legal responsibilities.
|
294 |
+
5. This open-source code does not have the ability to directly provide services to end-users. If you need to use this code for further model training or demos, as part of your product to provide services to end-users, or for similar use, please comply with applicable laws and regulations for your product or service. Any legal liabilities arising from this are unrelated to Tencent.
|
295 |
+
6. It is prohibited to use this open-source code for activities that harm the legitimate rights and interests of others (including but not limited to fraud, deception, infringement of others' portrait rights, reputation rights, etc.), or other behaviors that violate applicable laws and regulations or go against social ethics and good customs (including providing incorrect or false information, spreading pornographic, terrorist, and violent information, etc.). Otherwise, you may be liable for legal responsibilities.
|
296 |
+
```
|
297 |
+
|
298 |
+
LOGO: color and font suggestion: [ChatGPT](https://chat.openai.com), logo font: [Montserrat Alternates
|
299 |
+
](https://fonts.google.com/specimen/Montserrat+Alternates?preview.text=SadTalker&preview.text_type=custom&query=mont).
|
300 |
+
|
301 |
+
All the copyrights of the demo images and audio are from community users or the generation from stable diffusion. Feel free to contact us if you would like use to remove them.
|
302 |
+
|
303 |
+
|
304 |
+
<!-- Spelling fixed on Tuesday, September 12, 2023 by @fakerybakery (https://github.com/fakerybakery). These changes are licensed under the Apache 2.0 license. -->
|
app_sadtalker.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, sys
|
2 |
+
import gradio as gr
|
3 |
+
from src.gradio_demo import SadTalker
|
4 |
+
|
5 |
+
|
6 |
+
try:
|
7 |
+
import webui # in webui
|
8 |
+
in_webui = True
|
9 |
+
except:
|
10 |
+
in_webui = False
|
11 |
+
|
12 |
+
|
13 |
+
def toggle_audio_file(choice):
|
14 |
+
if choice == False:
|
15 |
+
return gr.update(visible=True), gr.update(visible=False)
|
16 |
+
else:
|
17 |
+
return gr.update(visible=False), gr.update(visible=True)
|
18 |
+
|
19 |
+
def ref_video_fn(path_of_ref_video):
|
20 |
+
if path_of_ref_video is not None:
|
21 |
+
return gr.update(value=True)
|
22 |
+
else:
|
23 |
+
return gr.update(value=False)
|
24 |
+
|
25 |
+
def sadtalker_demo(checkpoint_path='checkpoints', config_path='src/config', warpfn=None):
|
26 |
+
|
27 |
+
sad_talker = SadTalker(checkpoint_path, config_path, lazy_load=True)
|
28 |
+
|
29 |
+
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
|
30 |
+
gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
|
31 |
+
<a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> \
|
32 |
+
<a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> \
|
33 |
+
<a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
|
34 |
+
|
35 |
+
with gr.Row().style(equal_height=False):
|
36 |
+
with gr.Column(variant='panel'):
|
37 |
+
with gr.Tabs(elem_id="sadtalker_source_image"):
|
38 |
+
with gr.TabItem('Upload image'):
|
39 |
+
with gr.Row():
|
40 |
+
source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image").style(width=512)
|
41 |
+
|
42 |
+
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
43 |
+
with gr.TabItem('Upload OR TTS'):
|
44 |
+
with gr.Column(variant='panel'):
|
45 |
+
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
46 |
+
|
47 |
+
if sys.platform != 'win32' and not in_webui:
|
48 |
+
from src.utils.text2speech import TTSTalker
|
49 |
+
tts_talker = TTSTalker()
|
50 |
+
with gr.Column(variant='panel'):
|
51 |
+
input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
|
52 |
+
tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
|
53 |
+
tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
|
54 |
+
|
55 |
+
with gr.Column(variant='panel'):
|
56 |
+
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
57 |
+
with gr.TabItem('Settings'):
|
58 |
+
gr.Markdown("need help? please visit our [best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md) for more detials")
|
59 |
+
with gr.Column(variant='panel'):
|
60 |
+
# width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
|
61 |
+
# height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
|
62 |
+
pose_style = gr.Slider(minimum=0, maximum=46, step=1, label="Pose style", value=0) #
|
63 |
+
size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") #
|
64 |
+
preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
|
65 |
+
is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion, works with preprocess `full`)")
|
66 |
+
batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=2)
|
67 |
+
enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
|
68 |
+
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
|
69 |
+
|
70 |
+
with gr.Tabs(elem_id="sadtalker_genearted"):
|
71 |
+
gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
|
72 |
+
|
73 |
+
if warpfn:
|
74 |
+
submit.click(
|
75 |
+
fn=warpfn(sad_talker.test),
|
76 |
+
inputs=[source_image,
|
77 |
+
driven_audio,
|
78 |
+
preprocess_type,
|
79 |
+
is_still_mode,
|
80 |
+
enhancer,
|
81 |
+
batch_size,
|
82 |
+
size_of_image,
|
83 |
+
pose_style
|
84 |
+
],
|
85 |
+
outputs=[gen_video]
|
86 |
+
)
|
87 |
+
else:
|
88 |
+
submit.click(
|
89 |
+
fn=sad_talker.test,
|
90 |
+
inputs=[source_image,
|
91 |
+
driven_audio,
|
92 |
+
preprocess_type,
|
93 |
+
is_still_mode,
|
94 |
+
enhancer,
|
95 |
+
batch_size,
|
96 |
+
size_of_image,
|
97 |
+
pose_style
|
98 |
+
],
|
99 |
+
outputs=[gen_video]
|
100 |
+
)
|
101 |
+
|
102 |
+
return sadtalker_interface
|
103 |
+
|
104 |
+
|
105 |
+
if __name__ == "__main__":
|
106 |
+
demo = sadtalker_demo()
|
107 |
+
demo.queue()
|
108 |
+
demo.launch(share=True)
|
109 |
+
demo.deploy(name="NamastexLabs_SadTalker")
|
checkpoints/SadTalker_V0.0.2_256.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c211f5d6de003516bf1bbda9f47049a4c9c99133b1ab565c6961e5af16477bff
|
3 |
+
size 725066984
|
checkpoints/SadTalker_V0.0.2_512.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e063f7ff5258240bdb0f7690783a7b1374e6a4a81ce8fa33456f4cd49694340
|
3 |
+
size 725066984
|
checkpoints/mapping_00109-model.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84a8642468a3fcfdd9ab6be955267043116c2bec2284686a5262f1eaf017f64c
|
3 |
+
size 155779231
|
checkpoints/mapping_00229-model.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1
|
3 |
+
size 155521183
|
cog.yaml
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
build:
|
2 |
+
gpu: true
|
3 |
+
cuda: "11.3"
|
4 |
+
python_version: "3.8"
|
5 |
+
system_packages:
|
6 |
+
- "ffmpeg"
|
7 |
+
- "libgl1-mesa-glx"
|
8 |
+
- "libglib2.0-0"
|
9 |
+
python_packages:
|
10 |
+
- "torch==1.12.1"
|
11 |
+
- "torchvision==0.13.1"
|
12 |
+
- "torchaudio==0.12.1"
|
13 |
+
- "joblib==1.1.0"
|
14 |
+
- "scikit-image==0.19.3"
|
15 |
+
- "basicsr==1.4.2"
|
16 |
+
- "facexlib==0.3.0"
|
17 |
+
- "resampy==0.3.1"
|
18 |
+
- "pydub==0.25.1"
|
19 |
+
- "scipy==1.10.1"
|
20 |
+
- "kornia==0.6.8"
|
21 |
+
- "face_alignment==1.3.5"
|
22 |
+
- "imageio==2.19.3"
|
23 |
+
- "imageio-ffmpeg==0.4.7"
|
24 |
+
- "librosa==0.9.2" #
|
25 |
+
- "tqdm==4.65.0"
|
26 |
+
- "yacs==0.1.8"
|
27 |
+
- "gfpgan==1.3.8"
|
28 |
+
- "dlib-bin==19.24.1"
|
29 |
+
- "av==10.0.0"
|
30 |
+
- "trimesh==3.9.20"
|
31 |
+
run:
|
32 |
+
- mkdir -p /root/.cache/torch/hub/checkpoints/ && wget --output-document "/root/.cache/torch/hub/checkpoints/s3fd-619a316812.pth" "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
|
33 |
+
- mkdir -p /root/.cache/torch/hub/checkpoints/ && wget --output-document "/root/.cache/torch/hub/checkpoints/2DFAN4-cd938726ad.zip" "https://www.adrianbulat.com/downloads/python-fan/2DFAN4-cd938726ad.zip"
|
34 |
+
|
35 |
+
predict: "predict.py:Predictor"
|
docs/FAQ.md
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
## Frequency Asked Question
|
3 |
+
|
4 |
+
**Q: `ffmpeg` is not recognized as an internal or external command**
|
5 |
+
|
6 |
+
In Linux, you can install the ffmpeg via `conda install ffmpeg`. Or on Mac OS X, try to install ffmpeg via `brew install ffmpeg`. On windows, make sure you have `ffmpeg` in the `%PATH%` as suggested in [#54](https://github.com/Winfredy/SadTalker/issues/54), then, following [this](https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/) installation to install `ffmpeg`.
|
7 |
+
|
8 |
+
**Q: Running Requirments.**
|
9 |
+
|
10 |
+
Please refer to the discussion here: https://github.com/Winfredy/SadTalker/issues/124#issuecomment-1508113989
|
11 |
+
|
12 |
+
|
13 |
+
**Q: ModuleNotFoundError: No module named 'ai'**
|
14 |
+
|
15 |
+
please check the checkpoint's size of the `epoch_20.pth`. (https://github.com/Winfredy/SadTalker/issues/167, https://github.com/Winfredy/SadTalker/issues/113)
|
16 |
+
|
17 |
+
**Q: Illegal Hardware Error: Mac M1**
|
18 |
+
|
19 |
+
please reinstall the `dlib` by `pip install dlib` individually. (https://github.com/Winfredy/SadTalker/issues/129, https://github.com/Winfredy/SadTalker/issues/109)
|
20 |
+
|
21 |
+
|
22 |
+
**Q: FileNotFoundError: [Errno 2] No such file or directory: checkpoints\BFM_Fitting\similarity_Lm3D_all.mat**
|
23 |
+
|
24 |
+
Make sure you have downloaded the checkpoints and gfpgan as [here](https://github.com/Winfredy/SadTalker#-2-download-trained-models) and placed them in the right place.
|
25 |
+
|
26 |
+
**Q: RuntimeError: unexpected EOF, expected 237192 more bytes. The file might be corrupted.**
|
27 |
+
|
28 |
+
The files are not automatically downloaded. Please update the code and download the gfpgan folders as [here](https://github.com/Winfredy/SadTalker#-2-download-trained-models).
|
29 |
+
|
30 |
+
**Q: CUDA out of memory error**
|
31 |
+
|
32 |
+
please refer to https://stackoverflow.com/questions/73747731/runtimeerror-cuda-out-of-memory-how-setting-max-split-size-mb
|
33 |
+
|
34 |
+
```
|
35 |
+
# windows
|
36 |
+
set PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
|
37 |
+
python inference.py ...
|
38 |
+
|
39 |
+
# linux
|
40 |
+
export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
|
41 |
+
python inference.py ...
|
42 |
+
```
|
43 |
+
|
44 |
+
**Q: Error while decoding stream #0:0: Invalid data found when processing input [mp3float @ 0000015037628c00] Header missing**
|
45 |
+
|
46 |
+
Our method only support wav or mp3 files as input, please make sure the feeded audios are in these formats.
|
docs/best_practice.md
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Best Practices and Tips for configuration
|
2 |
+
|
3 |
+
> Our model only works on REAL people or the portrait image similar to REAL person. The anime talking head genreation method will be released in future.
|
4 |
+
|
5 |
+
Advanced confiuration options for `inference.py`:
|
6 |
+
|
7 |
+
| Name | Configuration | default | Explaination |
|
8 |
+
|:------------- |:------------- |:----- | :------------- |
|
9 |
+
| Enhance Mode | `--enhancer` | None | Using `gfpgan` or `RestoreFormer` to enhance the generated face via face restoration network
|
10 |
+
| Background Enhancer | `--background_enhancer` | None | Using `realesrgan` to enhance the full video.
|
11 |
+
| Still Mode | ` --still` | False | Using the same pose parameters as the original image, fewer head motion.
|
12 |
+
| Expressive Mode | `--expression_scale` | 1.0 | a larger value will make the expression motion stronger.
|
13 |
+
| save path | `--result_dir` |`./results` | The file will be save in the newer location.
|
14 |
+
| preprocess | `--preprocess` | `crop` | Run and produce the results in the croped input image. Other choices: `resize`, where the images will be resized to the specific resolution. `full` Run the full image animation, use with `--still` to get better results.
|
15 |
+
| ref Mode (eye) | `--ref_eyeblink` | None | A video path, where we borrow the eyeblink from this reference video to provide more natural eyebrow movement.
|
16 |
+
| ref Mode (pose) | `--ref_pose` | None | A video path, where we borrow the pose from the head reference video.
|
17 |
+
| 3D Mode | `--face3dvis` | False | Need additional installation. More details to generate the 3d face can be founded [here](docs/face3d.md).
|
18 |
+
| free-view Mode | `--input_yaw`,<br> `--input_pitch`,<br> `--input_roll` | None | Genearting novel view or free-view 4D talking head from a single image. More details can be founded [here](https://github.com/Winfredy/SadTalker#generating-4d-free-view-talking-examples-from-audio-and-a-single-image).
|
19 |
+
|
20 |
+
|
21 |
+
### About `--preprocess`
|
22 |
+
|
23 |
+
Our system automatically handles the input images via `crop`, `resize` and `full`.
|
24 |
+
|
25 |
+
In `crop` mode, we only generate the croped image via the facial keypoints and generated the facial anime avator. The animation of both expression and head pose are realistic.
|
26 |
+
|
27 |
+
> Still mode will stop the eyeblink and head pose movement.
|
28 |
+
|
29 |
+
| [input image @bagbag1815](https://twitter.com/bagbag1815/status/1642754319094108161) | crop | crop w/still |
|
30 |
+
|:--------------------: |:--------------------: | :----: |
|
31 |
+
| <img src='../examples/source_image/full_body_2.png' width='380'> | ![full_body_2](example_crop.gif) | ![full_body_2](example_crop_still.gif) |
|
32 |
+
|
33 |
+
|
34 |
+
In `resize` mode, we resize the whole images to generate the fully talking head video. Thus, an image similar to the ID photo can be produced. ⚠️ It will produce bad results for full person images.
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
| <img src='../examples/source_image/full_body_2.png' width='380'> | <img src='../examples/source_image/full4.jpeg' width='380'> |
|
40 |
+
|:--------------------: |:--------------------: |
|
41 |
+
| ❌ not suitable for resize mode | ✅ good for resize mode |
|
42 |
+
| <img src='resize_no.gif'> | <img src='resize_good.gif' width='380'> |
|
43 |
+
|
44 |
+
In `full` mode, our model will automatically process the croped region and paste back to the original image. Remember to use `--still` to keep the original head pose.
|
45 |
+
|
46 |
+
| input | `--still` | `--still` & `enhancer` |
|
47 |
+
|:--------------------: |:--------------------: | :--:|
|
48 |
+
| <img src='../examples/source_image/full_body_2.png' width='380'> | <img src='./example_full.gif' width='380'> | <img src='./example_full_enhanced.gif' width='380'>
|
49 |
+
|
50 |
+
|
51 |
+
### About `--enhancer`
|
52 |
+
|
53 |
+
For higher resolution, we intergate [gfpgan](https://github.com/TencentARC/GFPGAN) and [real-esrgan](https://github.com/xinntao/Real-ESRGAN) for different purpose. Just adding `--enhancer <gfpgan or RestoreFormer>` or `--background_enhancer <realesrgan>` for the enhancement of the face and the full image.
|
54 |
+
|
55 |
+
```bash
|
56 |
+
# make sure above packages are available:
|
57 |
+
pip install gfpgan
|
58 |
+
pip install realesrgan
|
59 |
+
```
|
60 |
+
|
61 |
+
### About `--face3dvis`
|
62 |
+
|
63 |
+
This flag indicate that we can generated the 3d-rendered face and it's 3d facial landmarks. More details can be founded [here](face3d.md).
|
64 |
+
|
65 |
+
| Input | Animated 3d face |
|
66 |
+
|:-------------: | :-------------: |
|
67 |
+
| <img src='../examples/source_image/art_0.png' width='200px'> | <video src="https://user-images.githubusercontent.com/4397546/226856847-5a6a0a4d-a5ec-49e2-9b05-3206db65e8e3.mp4"></video> |
|
68 |
+
|
69 |
+
> Kindly ensure to activate the audio as the default audio playing is incompatible with GitHub.
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
#### Reference eye-link mode.
|
74 |
+
|
75 |
+
| Input, w/ reference video , reference video |
|
76 |
+
|:-------------: |
|
77 |
+
| ![free_view](using_ref_video.gif)|
|
78 |
+
| If the reference video is shorter than the input audio, we will loop the reference video .
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
+
#### Generating 4D free-view talking examples from audio and a single image
|
83 |
+
|
84 |
+
We use `input_yaw`, `input_pitch`, `input_roll` to control head pose. For example, `--input_yaw -20 30 10` means the input head yaw degree changes from -20 to 30 and then changes from 30 to 10.
|
85 |
+
```bash
|
86 |
+
python inference.py --driven_audio <audio.wav> \
|
87 |
+
--source_image <video.mp4 or picture.png> \
|
88 |
+
--result_dir <a file to store results> \
|
89 |
+
--input_yaw -20 30 10
|
90 |
+
```
|
91 |
+
|
92 |
+
| Results, Free-view results, Novel view results |
|
93 |
+
|:-------------: |
|
94 |
+
| ![free_view](free_view_result.gif)|
|
docs/changlelog.md
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## changelogs
|
2 |
+
|
3 |
+
|
4 |
+
- __[2023.04.06]__: stable-diffiusion webui extension is release.
|
5 |
+
|
6 |
+
- __[2023.04.03]__: Enable TTS in huggingface and gradio local demo.
|
7 |
+
|
8 |
+
- __[2023.03.30]__: Launch beta version of the full body mode.
|
9 |
+
|
10 |
+
- __[2023.03.30]__: Launch new feature: through using reference videos, our algorithm can generate videos with more natural eye blinking and some eyebrow movement.
|
11 |
+
|
12 |
+
- __[2023.03.29]__: `resize mode` is online by `python infererence.py --preprocess resize`! Where we can produce a larger crop of the image as discussed in https://github.com/Winfredy/SadTalker/issues/35.
|
13 |
+
|
14 |
+
- __[2023.03.29]__: local gradio demo is online! `python app.py` to start the demo. New `requirments.txt` is used to avoid the bugs in `librosa`.
|
15 |
+
|
16 |
+
- __[2023.03.28]__: Online demo is launched in [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/vinthony/SadTalker), thanks AK!
|
17 |
+
|
18 |
+
- __[2023.03.22]__: Launch new feature: generating the 3d face animation from a single image. New applications about it will be updated.
|
19 |
+
|
20 |
+
- __[2023.03.22]__: Launch new feature: `still mode`, where only a small head pose will be produced via `python inference.py --still`.
|
21 |
+
|
22 |
+
- __[2023.03.18]__: Support `expression intensity`, now you can change the intensity of the generated motion: `python inference.py --expression_scale 1.3 (some value > 1)`.
|
23 |
+
|
24 |
+
- __[2023.03.18]__: Reconfig the data folders, now you can download the checkpoint automatically using `bash scripts/download_models.sh`.
|
25 |
+
- __[2023.03.18]__: We have offically integrate the [GFPGAN](https://github.com/TencentARC/GFPGAN) for face enhancement, using `python inference.py --enhancer gfpgan` for better visualization performance.
|
26 |
+
- __[2023.03.14]__: Specify the version of package `joblib` to remove the errors in using `librosa`, [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Winfredy/SadTalker/blob/main/quick_demo.ipynb) is online!
|
27 |
+
- __[2023.03.06]__: Solve some bugs in code and errors in installation
|
28 |
+
- __[2023.03.03]__: Release the test code for audio-driven single image animation!
|
29 |
+
- __[2023.02.28]__: SadTalker has been accepted by CVPR 2023!
|
docs/example_crop.gif
ADDED
![]() |
Git LFS Details
|
docs/example_crop_still.gif
ADDED
![]() |
Git LFS Details
|
docs/example_full.gif
ADDED
![]() |
Git LFS Details
|
docs/example_full_crop.gif
ADDED
![]() |
docs/example_full_enhanced.gif
ADDED
![]() |
Git LFS Details
|
docs/face3d.md
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## 3D Face Visualization
|
2 |
+
|
3 |
+
We use `pytorch3d` to visualize the 3D faces from a single image.
|
4 |
+
|
5 |
+
The requirements for 3D visualization are difficult to install, so here's a tutorial:
|
6 |
+
|
7 |
+
```bash
|
8 |
+
git clone https://github.com/OpenTalker/SadTalker.git
|
9 |
+
cd SadTalker
|
10 |
+
conda create -n sadtalker3d python=3.8
|
11 |
+
source activate sadtalker3d
|
12 |
+
|
13 |
+
conda install ffmpeg
|
14 |
+
conda install -c fvcore -c iopath -c conda-forge fvcore iopath
|
15 |
+
conda install libgcc gmp
|
16 |
+
|
17 |
+
pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113
|
18 |
+
|
19 |
+
# insintall pytorch3d
|
20 |
+
pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu113_pyt1110/download.html
|
21 |
+
|
22 |
+
pip install -r requirements3d.txt
|
23 |
+
|
24 |
+
### install gpfgan for enhancer
|
25 |
+
pip install git+https://github.com/TencentARC/GFPGAN
|
26 |
+
|
27 |
+
|
28 |
+
### when occurs gcc version problem `from pytorch import _C` from pytorch3d, add the anaconda path to LD_LIBRARY_PATH
|
29 |
+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/$YOUR_ANACONDA_PATH/lib/
|
30 |
+
|
31 |
+
```
|
32 |
+
|
33 |
+
Then, generate the result via:
|
34 |
+
|
35 |
+
```bash
|
36 |
+
|
37 |
+
|
38 |
+
python inference.py --driven_audio <audio.wav> \
|
39 |
+
--source_image <video.mp4 or picture.png> \
|
40 |
+
--result_dir <a file to store results> \
|
41 |
+
--face3dvis
|
42 |
+
|
43 |
+
```
|
44 |
+
|
45 |
+
The results will appear, named `face3d.mp4`.
|
46 |
+
|
47 |
+
More applications about 3D face rendering will be released soon.
|
docs/free_view_result.gif
ADDED
![]() |
Git LFS Details
|
docs/install.md
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### macOS
|
2 |
+
|
3 |
+
This method has been tested on a M1 Mac (13.3)
|
4 |
+
|
5 |
+
```bash
|
6 |
+
git clone https://github.com/OpenTalker/SadTalker.git
|
7 |
+
cd SadTalker
|
8 |
+
conda create -n sadtalker python=3.8
|
9 |
+
conda activate sadtalker
|
10 |
+
# install pytorch 2.0
|
11 |
+
pip install torch torchvision torchaudio
|
12 |
+
conda install ffmpeg
|
13 |
+
pip install -r requirements.txt
|
14 |
+
pip install dlib # macOS needs to install the original dlib.
|
15 |
+
```
|
16 |
+
|
17 |
+
### Windows Native
|
18 |
+
|
19 |
+
- Make sure you have `ffmpeg` in the `%PATH%` as suggested in [#54](https://github.com/Winfredy/SadTalker/issues/54), following [this](https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/) tutorial to install `ffmpeg` or using scoop.
|
20 |
+
|
21 |
+
|
22 |
+
### Windows WSL
|
23 |
+
|
24 |
+
|
25 |
+
- Make sure the environment: `export LD_LIBRARY_PATH=/usr/lib/wsl/lib:$LD_LIBRARY_PATH`
|
26 |
+
|
27 |
+
|
28 |
+
### Docker Installation
|
29 |
+
|
30 |
+
A community Docker image by [@thegenerativegeneration](https://github.com/thegenerativegeneration) is available on the [Docker hub](https://hub.docker.com/repository/docker/wawa9000/sadtalker), which can be used directly:
|
31 |
+
```bash
|
32 |
+
docker run --gpus "all" --rm -v $(pwd):/host_dir wawa9000/sadtalker \
|
33 |
+
--driven_audio /host_dir/deyu.wav \
|
34 |
+
--source_image /host_dir/image.jpg \
|
35 |
+
--expression_scale 1.0 \
|
36 |
+
--still \
|
37 |
+
--result_dir /host_dir
|
38 |
+
```
|
39 |
+
|
docs/resize_good.gif
ADDED
![]() |
Git LFS Details
|
docs/resize_no.gif
ADDED
![]() |
Git LFS Details
|
docs/sadtalker_logo.png
ADDED
![]() |
docs/using_ref_video.gif
ADDED
![]() |
Git LFS Details
|
docs/webui_extension.md
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Run SadTalker as a Stable Diffusion WebUI Extension.
|
2 |
+
|
3 |
+
1. Install the lastest version of [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) and install SadTalker via `extension`.
|
4 |
+
<img width="726" alt="image" src="https://user-images.githubusercontent.com/4397546/230698519-267d1d1f-6e99-4dd4-81e1-7b889259efbd.png">
|
5 |
+
|
6 |
+
2. Download the checkpoints manually, for Linux and Mac:
|
7 |
+
|
8 |
+
```bash
|
9 |
+
|
10 |
+
cd SOMEWHERE_YOU_LIKE
|
11 |
+
|
12 |
+
bash <(wget -qO- https://raw.githubusercontent.com/Winfredy/OpenTalker/main/scripts/download_models.sh)
|
13 |
+
```
|
14 |
+
|
15 |
+
For Windows, you can download all the checkpoints [here](https://github.com/OpenTalker/SadTalker/tree/main#2-download-models).
|
16 |
+
|
17 |
+
3.1. Option 1: put the checkpoint in `stable-diffusion-webui/models/SadTalker` or `stable-diffusion-webui/extensions/SadTalker/checkpoints/`, the checkpoints will be detected automatically.
|
18 |
+
|
19 |
+
3.2. Option 2: Set the path of `SADTALKTER_CHECKPOINTS` in `webui_user.sh`(linux) or `webui_user.bat`(windows) by:
|
20 |
+
|
21 |
+
> only works if you are directly starting webui from `webui_user.sh` or `webui_user.bat`.
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Windows (webui_user.bat)
|
25 |
+
set SADTALKER_CHECKPOINTS=D:\SadTalker\checkpoints
|
26 |
+
|
27 |
+
# Linux/macOS (webui_user.sh)
|
28 |
+
export SADTALKER_CHECKPOINTS=/path/to/SadTalker/checkpoints
|
29 |
+
```
|
30 |
+
|
31 |
+
4. Start the WebUI via `webui.sh or webui_user.sh(linux)` or `webui_user.bat(windows)` or any other method. SadTalker can also be used in stable-diffusion-webui directly.
|
32 |
+
|
33 |
+
<img width="726" alt="image" src="https://user-images.githubusercontent.com/4397546/230698614-58015182-2916-4240-b324-e69022ef75b3.png">
|
34 |
+
|
35 |
+
## Questions
|
36 |
+
|
37 |
+
1. if you are running on CPU, you need to specific `--disable-safe-unpickle` in `webui_user.sh` or `webui_user.bat`.
|
38 |
+
|
39 |
+
```bash
|
40 |
+
# windows (webui_user.bat)
|
41 |
+
set COMMANDLINE_ARGS="--disable-safe-unpickle"
|
42 |
+
|
43 |
+
# linux (webui_user.sh)
|
44 |
+
export COMMANDLINE_ARGS="--disable-safe-unpickle"
|
45 |
+
```
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
(If you're unable to use the `full` mode, please read this [discussion](https://github.com/Winfredy/SadTalker/issues/78).)
|
examples/driven_audio/RD_Radio31_000.wav
ADDED
Binary file (512 kB). View file
|
|
examples/driven_audio/RD_Radio34_002.wav
ADDED
Binary file (512 kB). View file
|
|
examples/driven_audio/RD_Radio36_000.wav
ADDED
Binary file (512 kB). View file
|
|
examples/driven_audio/RD_Radio40_000.wav
ADDED
Binary file (512 kB). View file
|
|
examples/driven_audio/bus_chinese.wav
ADDED
Binary file (652 kB). View file
|
|
examples/driven_audio/chinese_news.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b0f4d313a1ca671bc4831d60bcf0c12225efbffe6c0e93e54fbfe9bcd4021cb
|
3 |
+
size 1536078
|
examples/driven_audio/chinese_poem1.wav
ADDED
Binary file (263 kB). View file
|
|
examples/driven_audio/chinese_poem2.wav
ADDED
Binary file (461 kB). View file
|
|
examples/driven_audio/deyu.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba1839c57770a2ab0b593ce814344bfd4d750da02acc9be9e8cf5b9113a0f88a
|
3 |
+
size 2694784
|
examples/driven_audio/eluosi.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4a3593815dc7b68c256672baa61934c9479efa770af2065fb0886f02713606e
|
3 |
+
size 1786672
|
examples/driven_audio/fayu.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16ebd13626ae4171030b4ea05cceef06078483c352e4b68d469fc2a52bfffceb
|
3 |
+
size 1940428
|
examples/driven_audio/imagine.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2db410217e074d91ae6011e1c5dc0b94f02d05d381c50af8e54253eeacad17d2
|
3 |
+
size 1618510
|
examples/driven_audio/itosinger1.wav
ADDED
Binary file (500 kB). View file
|
|
examples/driven_audio/japanese.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3db5426d0b158799e2be4f609b11f75bfbd4affffe18e9a1c8e6f241fcdedcfc
|
3 |
+
size 2622712
|
examples/ref_video/WDA_AlexandriaOcasioCortez_000.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a85242c3fc4d50e2202cea393b9e7ee59019759b68e78e26a254d528c22615a7
|
3 |
+
size 2257667
|
examples/ref_video/WDA_KatieHill_000.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fbb4cfd64eedc49b170c441714a9c4fd5e2c2f8a11592070ad89fbd257f2817
|
3 |
+
size 3548230
|
examples/source_image/art_0.png
ADDED
![]() |
examples/source_image/art_1.png
ADDED
![]() |
examples/source_image/art_10.png
ADDED
![]() |
examples/source_image/art_11.png
ADDED
![]() |
examples/source_image/art_12.png
ADDED
![]() |
examples/source_image/art_13.png
ADDED
![]() |
examples/source_image/art_14.png
ADDED
![]() |
examples/source_image/art_15.png
ADDED
![]() |