jmanhype commited on
Commit
22541b3
·
1 Parent(s): ca17ccd

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +27 -0
  2. .gitignore +174 -0
  3. LICENSE +209 -0
  4. README.md +299 -7
  5. app_sadtalker.py +109 -0
  6. checkpoints/SadTalker_V0.0.2_256.safetensors +3 -0
  7. checkpoints/SadTalker_V0.0.2_512.safetensors +3 -0
  8. checkpoints/mapping_00109-model.pth.tar +3 -0
  9. checkpoints/mapping_00229-model.pth.tar +3 -0
  10. cog.yaml +35 -0
  11. docs/FAQ.md +46 -0
  12. docs/best_practice.md +94 -0
  13. docs/changlelog.md +29 -0
  14. docs/example_crop.gif +3 -0
  15. docs/example_crop_still.gif +3 -0
  16. docs/example_full.gif +3 -0
  17. docs/example_full_crop.gif +0 -0
  18. docs/example_full_enhanced.gif +3 -0
  19. docs/face3d.md +47 -0
  20. docs/free_view_result.gif +3 -0
  21. docs/install.md +39 -0
  22. docs/resize_good.gif +3 -0
  23. docs/resize_no.gif +3 -0
  24. docs/sadtalker_logo.png +0 -0
  25. docs/using_ref_video.gif +3 -0
  26. docs/webui_extension.md +49 -0
  27. examples/driven_audio/RD_Radio31_000.wav +0 -0
  28. examples/driven_audio/RD_Radio34_002.wav +0 -0
  29. examples/driven_audio/RD_Radio36_000.wav +0 -0
  30. examples/driven_audio/RD_Radio40_000.wav +0 -0
  31. examples/driven_audio/bus_chinese.wav +0 -0
  32. examples/driven_audio/chinese_news.wav +3 -0
  33. examples/driven_audio/chinese_poem1.wav +0 -0
  34. examples/driven_audio/chinese_poem2.wav +0 -0
  35. examples/driven_audio/deyu.wav +3 -0
  36. examples/driven_audio/eluosi.wav +3 -0
  37. examples/driven_audio/fayu.wav +3 -0
  38. examples/driven_audio/imagine.wav +3 -0
  39. examples/driven_audio/itosinger1.wav +0 -0
  40. examples/driven_audio/japanese.wav +3 -0
  41. examples/ref_video/WDA_AlexandriaOcasioCortez_000.mp4 +3 -0
  42. examples/ref_video/WDA_KatieHill_000.mp4 +3 -0
  43. examples/source_image/art_0.png +0 -0
  44. examples/source_image/art_1.png +0 -0
  45. examples/source_image/art_10.png +0 -0
  46. examples/source_image/art_11.png +0 -0
  47. examples/source_image/art_12.png +0 -0
  48. examples/source_image/art_13.png +0 -0
  49. examples/source_image/art_14.png +0 -0
  50. examples/source_image/art_15.png +0 -0
.gitattributes CHANGED
@@ -33,3 +33,30 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ docs/example_crop.gif filter=lfs diff=lfs merge=lfs -text
37
+ docs/example_crop_still.gif filter=lfs diff=lfs merge=lfs -text
38
+ docs/example_full.gif filter=lfs diff=lfs merge=lfs -text
39
+ docs/example_full_enhanced.gif filter=lfs diff=lfs merge=lfs -text
40
+ docs/free_view_result.gif filter=lfs diff=lfs merge=lfs -text
41
+ docs/resize_good.gif filter=lfs diff=lfs merge=lfs -text
42
+ docs/resize_no.gif filter=lfs diff=lfs merge=lfs -text
43
+ docs/using_ref_video.gif filter=lfs diff=lfs merge=lfs -text
44
+ examples/driven_audio/chinese_news.wav filter=lfs diff=lfs merge=lfs -text
45
+ examples/driven_audio/deyu.wav filter=lfs diff=lfs merge=lfs -text
46
+ examples/driven_audio/eluosi.wav filter=lfs diff=lfs merge=lfs -text
47
+ examples/driven_audio/fayu.wav filter=lfs diff=lfs merge=lfs -text
48
+ examples/driven_audio/imagine.wav filter=lfs diff=lfs merge=lfs -text
49
+ examples/driven_audio/japanese.wav filter=lfs diff=lfs merge=lfs -text
50
+ examples/ref_video/WDA_AlexandriaOcasioCortez_000.mp4 filter=lfs diff=lfs merge=lfs -text
51
+ examples/ref_video/WDA_KatieHill_000.mp4 filter=lfs diff=lfs merge=lfs -text
52
+ examples/source_image/art_16.png filter=lfs diff=lfs merge=lfs -text
53
+ examples/source_image/art_17.png filter=lfs diff=lfs merge=lfs -text
54
+ examples/source_image/art_3.png filter=lfs diff=lfs merge=lfs -text
55
+ examples/source_image/art_4.png filter=lfs diff=lfs merge=lfs -text
56
+ examples/source_image/art_5.png filter=lfs diff=lfs merge=lfs -text
57
+ examples/source_image/art_8.png filter=lfs diff=lfs merge=lfs -text
58
+ examples/source_image/art_9.png filter=lfs diff=lfs merge=lfs -text
59
+ results/2a145f10-368f-4c49-ad9c-d81f26620673/WDA_AlexandriaOcasioCortez_000_full.mp4 filter=lfs diff=lfs merge=lfs -text
60
+ results/3c31eee7-6a33-4251-9f60-0d435d2cedea/WDA_KatieHill_000_enhanced.mp4 filter=lfs diff=lfs merge=lfs -text
61
+ results/43e6d2d2-a0ad-4c24-bc7b-58c9231d2d59/WDA_KatieHill_000_enhanced.mp4 filter=lfs diff=lfs merge=lfs -text
62
+ results/ef1b868f-279e-4981-917b-29e4b6559046/WDA_AlexandriaOcasioCortez_000_full.mp4 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ .idea/
161
+
162
+ examples/results/*
163
+ gfpgan/*
164
+ checkpoints/*
165
+ assets/*
166
+ results/*
167
+ Dockerfile
168
+ start_docker.sh
169
+ start.sh
170
+
171
+ checkpoints
172
+
173
+ # Mac
174
+ .DS_Store
LICENSE ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Tencent is pleased to support the open source community by making SadTalker available.
2
+
3
+ Copyright (C), a Tencent company. All rights reserved.
4
+
5
+ SadTalker is licensed under the Apache 2.0 License, except for the third-party components listed below.
6
+
7
+ Terms of the Apache License Version 2.0:
8
+ ---------------------------------------------
9
+ Apache License
10
+ Version 2.0, January 2004
11
+ http://www.apache.org/licenses/
12
+
13
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
14
+
15
+ 1. Definitions.
16
+
17
+ "License" shall mean the terms and conditions for use, reproduction,
18
+ and distribution as defined by Sections 1 through 9 of this document.
19
+
20
+ "Licensor" shall mean the copyright owner or entity authorized by
21
+ the copyright owner that is granting the License.
22
+
23
+ "Legal Entity" shall mean the union of the acting entity and all
24
+ other entities that control, are controlled by, or are under common
25
+ control with that entity. For the purposes of this definition,
26
+ "control" means (i) the power, direct or indirect, to cause the
27
+ direction or management of such entity, whether by contract or
28
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
29
+ outstanding shares, or (iii) beneficial ownership of such entity.
30
+
31
+ "You" (or "Your") shall mean an individual or Legal Entity
32
+ exercising permissions granted by this License.
33
+
34
+ "Source" form shall mean the preferred form for making modifications,
35
+ including but not limited to software source code, documentation
36
+ source, and configuration files.
37
+
38
+ "Object" form shall mean any form resulting from mechanical
39
+ transformation or translation of a Source form, including but
40
+ not limited to compiled object code, generated documentation,
41
+ and conversions to other media types.
42
+
43
+ "Work" shall mean the work of authorship, whether in Source or
44
+ Object form, made available under the License, as indicated by a
45
+ copyright notice that is included in or attached to the work
46
+ (an example is provided in the Appendix below).
47
+
48
+ "Derivative Works" shall mean any work, whether in Source or Object
49
+ form, that is based on (or derived from) the Work and for which the
50
+ editorial revisions, annotations, elaborations, or other modifications
51
+ represent, as a whole, an original work of authorship. For the purposes
52
+ of this License, Derivative Works shall not include works that remain
53
+ separable from, or merely link (or bind by name) to the interfaces of,
54
+ the Work and Derivative Works thereof.
55
+
56
+ "Contribution" shall mean any work of authorship, including
57
+ the original version of the Work and any modifications or additions
58
+ to that Work or Derivative Works thereof, that is intentionally
59
+ submitted to Licensor for inclusion in the Work by the copyright owner
60
+ or by an individual or Legal Entity authorized to submit on behalf of
61
+ the copyright owner. For the purposes of this definition, "submitted"
62
+ means any form of electronic, verbal, or written communication sent
63
+ to the Licensor or its representatives, including but not limited to
64
+ communication on electronic mailing lists, source code control systems,
65
+ and issue tracking systems that are managed by, or on behalf of, the
66
+ Licensor for the purpose of discussing and improving the Work, but
67
+ excluding communication that is conspicuously marked or otherwise
68
+ designated in writing by the copyright owner as "Not a Contribution."
69
+
70
+ "Contributor" shall mean Licensor and any individual or Legal Entity
71
+ on behalf of whom a Contribution has been received by Licensor and
72
+ subsequently incorporated within the Work.
73
+
74
+ 2. Grant of Copyright License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ copyright license to reproduce, prepare Derivative Works of,
78
+ publicly display, publicly perform, sublicense, and distribute the
79
+ Work and such Derivative Works in Source or Object form.
80
+
81
+ 3. Grant of Patent License. Subject to the terms and conditions of
82
+ this License, each Contributor hereby grants to You a perpetual,
83
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
84
+ (except as stated in this section) patent license to make, have made,
85
+ use, offer to sell, sell, import, and otherwise transfer the Work,
86
+ where such license applies only to those patent claims licensable
87
+ by such Contributor that are necessarily infringed by their
88
+ Contribution(s) alone or by combination of their Contribution(s)
89
+ with the Work to which such Contribution(s) was submitted. If You
90
+ institute patent litigation against any entity (including a
91
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
92
+ or a Contribution incorporated within the Work constitutes direct
93
+ or contributory patent infringement, then any patent licenses
94
+ granted to You under this License for that Work shall terminate
95
+ as of the date such litigation is filed.
96
+
97
+ 4. Redistribution. You may reproduce and distribute copies of the
98
+ Work or Derivative Works thereof in any medium, with or without
99
+ modifications, and in Source or Object form, provided that You
100
+ meet the following conditions:
101
+
102
+ (a) You must give any other recipients of the Work or
103
+ Derivative Works a copy of this License; and
104
+
105
+ (b) You must cause any modified files to carry prominent notices
106
+ stating that You changed the files; and
107
+
108
+ (c) You must retain, in the Source form of any Derivative Works
109
+ that You distribute, all copyright, patent, trademark, and
110
+ attribution notices from the Source form of the Work,
111
+ excluding those notices that do not pertain to any part of
112
+ the Derivative Works; and
113
+
114
+ (d) If the Work includes a "NOTICE" text file as part of its
115
+ distribution, then any Derivative Works that You distribute must
116
+ include a readable copy of the attribution notices contained
117
+ within such NOTICE file, excluding those notices that do not
118
+ pertain to any part of the Derivative Works, in at least one
119
+ of the following places: within a NOTICE text file distributed
120
+ as part of the Derivative Works; within the Source form or
121
+ documentation, if provided along with the Derivative Works; or,
122
+ within a display generated by the Derivative Works, if and
123
+ wherever such third-party notices normally appear. The contents
124
+ of the NOTICE file are for informational purposes only and
125
+ do not modify the License. You may add Your own attribution
126
+ notices within Derivative Works that You distribute, alongside
127
+ or as an addendum to the NOTICE text from the Work, provided
128
+ that such additional attribution notices cannot be construed
129
+ as modifying the License.
130
+
131
+ You may add Your own copyright statement to Your modifications and
132
+ may provide additional or different license terms and conditions
133
+ for use, reproduction, or distribution of Your modifications, or
134
+ for any such Derivative Works as a whole, provided Your use,
135
+ reproduction, and distribution of the Work otherwise complies with
136
+ the conditions stated in this License.
137
+
138
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
139
+ any Contribution intentionally submitted for inclusion in the Work
140
+ by You to the Licensor shall be under the terms and conditions of
141
+ this License, without any additional terms or conditions.
142
+ Notwithstanding the above, nothing herein shall supersede or modify
143
+ the terms of any separate license agreement you may have executed
144
+ with Licensor regarding such Contributions.
145
+
146
+ 6. Trademarks. This License does not grant permission to use the trade
147
+ names, trademarks, service marks, or product names of the Licensor,
148
+ except as required for reasonable and customary use in describing the
149
+ origin of the Work and reproducing the content of the NOTICE file.
150
+
151
+ 7. Disclaimer of Warranty. Unless required by applicable law or
152
+ agreed to in writing, Licensor provides the Work (and each
153
+ Contributor provides its Contributions) on an "AS IS" BASIS,
154
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
155
+ implied, including, without limitation, any warranties or conditions
156
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
157
+ PARTICULAR PURPOSE. You are solely responsible for determining the
158
+ appropriateness of using or redistributing the Work and assume any
159
+ risks associated with Your exercise of permissions under this License.
160
+
161
+ 8. Limitation of Liability. In no event and under no legal theory,
162
+ whether in tort (including negligence), contract, or otherwise,
163
+ unless required by applicable law (such as deliberate and grossly
164
+ negligent acts) or agreed to in writing, shall any Contributor be
165
+ liable to You for damages, including any direct, indirect, special,
166
+ incidental, or consequential damages of any character arising as a
167
+ result of this License or out of the use or inability to use the
168
+ Work (including but not limited to damages for loss of goodwill,
169
+ work stoppage, computer failure or malfunction, or any and all
170
+ other commercial damages or losses), even if such Contributor
171
+ has been advised of the possibility of such damages.
172
+
173
+ 9. Accepting Warranty or Additional Liability. While redistributing
174
+ the Work or Derivative Works thereof, You may choose to offer,
175
+ and charge a fee for, acceptance of support, warranty, indemnity,
176
+ or other liability obligations and/or rights consistent with this
177
+ License. However, in accepting such obligations, You may act only
178
+ on Your own behalf and on Your sole responsibility, not on behalf
179
+ of any other Contributor, and only if You agree to indemnify,
180
+ defend, and hold each Contributor harmless for any liability
181
+ incurred by, or claims asserted against, such Contributor by reason
182
+ of your accepting any such warranty or additional liability.
183
+
184
+ END OF TERMS AND CONDITIONS
185
+
186
+ APPENDIX: How to apply the Apache License to your work.
187
+
188
+ To apply the Apache License to your work, attach the following
189
+ boilerplate notice, with the fields enclosed by brackets "[]"
190
+ replaced with your own identifying information. (Don't include
191
+ the brackets!) The text should be enclosed in the appropriate
192
+ comment syntax for the file format. We also recommend that a
193
+ file or class name and description of purpose be included on the
194
+ same "printed page" as the copyright notice for easier
195
+ identification within third-party archives.
196
+
197
+ Copyright [yyyy] [name of copyright owner]
198
+
199
+ Licensed under the Apache License, Version 2.0 (the "License");
200
+ you may not use this file except in compliance with the License.
201
+ You may obtain a copy of the License at
202
+
203
+ http://www.apache.org/licenses/LICENSE-2.0
204
+
205
+ Unless required by applicable law or agreed to in writing, software
206
+ distributed under the License is distributed on an "AS IS" BASIS,
207
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
208
+ See the License for the specific language governing permissions and
209
+ limitations under the License.
README.md CHANGED
@@ -1,12 +1,304 @@
1
  ---
2
- title: NamastexLabs SadTalker
3
- emoji: 😻
4
- colorFrom: blue
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 3.47.1
8
- app_file: app.py
9
- pinned: false
10
  ---
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: NamastexLabs_SadTalker
3
+ app_file: app_sadtalker.py
 
 
4
  sdk: gradio
5
  sdk_version: 3.47.1
 
 
6
  ---
7
+ <div align="center">
8
 
9
+ <img src='https://user-images.githubusercontent.com/4397546/229094115-862c747e-7397-4b54-ba4a-bd368bfe2e0f.png' width='500px'/>
10
+
11
+
12
+ <!--<h2> 😭 SadTalker: <span style="font-size:12px">Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation </span> </h2> -->
13
+
14
+ <a href='https://arxiv.org/abs/2211.12194'><img src='https://img.shields.io/badge/ArXiv-PDF-red'></a> &nbsp; <a href='https://sadtalker.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a> &nbsp; [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Winfredy/SadTalker/blob/main/quick_demo.ipynb) &nbsp; [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/vinthony/SadTalker) &nbsp; [![sd webui-colab](https://img.shields.io/badge/Automatic1111-Colab-green)](https://colab.research.google.com/github/camenduru/stable-diffusion-webui-colab/blob/main/video/stable/stable_diffusion_1_5_video_webui_colab.ipynb) &nbsp; <br> [![Replicate](https://replicate.com/cjwbw/sadtalker/badge)](https://replicate.com/cjwbw/sadtalker) [![Discord](https://dcbadge.vercel.app/api/server/rrayYqZ4tf?style=flat)](https://discord.gg/rrayYqZ4tf)
15
+
16
+ <div>
17
+ <a target='_blank'>Wenxuan Zhang <sup>*,1,2</sup> </a>&emsp;
18
+ <a href='https://vinthony.github.io/' target='_blank'>Xiaodong Cun <sup>*,2</a>&emsp;
19
+ <a href='https://xuanwangvc.github.io/' target='_blank'>Xuan Wang <sup>3</sup></a>&emsp;
20
+ <a href='https://yzhang2016.github.io/' target='_blank'>Yong Zhang <sup>2</sup></a>&emsp;
21
+ <a href='https://xishen0220.github.io/' target='_blank'>Xi Shen <sup>2</sup></a>&emsp; </br>
22
+ <a href='https://yuguo-xjtu.github.io/' target='_blank'>Yu Guo<sup>1</sup> </a>&emsp;
23
+ <a href='https://scholar.google.com/citations?hl=zh-CN&user=4oXBp9UAAAAJ' target='_blank'>Ying Shan <sup>2</sup> </a>&emsp;
24
+ <a target='_blank'>Fei Wang <sup>1</sup> </a>&emsp;
25
+ </div>
26
+ <br>
27
+ <div>
28
+ <sup>1</sup> Xi'an Jiaotong University &emsp; <sup>2</sup> Tencent AI Lab &emsp; <sup>3</sup> Ant Group &emsp;
29
+ </div>
30
+ <br>
31
+ <i><strong><a href='https://arxiv.org/abs/2211.12194' target='_blank'>CVPR 2023</a></strong></i>
32
+ <br>
33
+ <br>
34
+
35
+
36
+ ![sadtalker](https://user-images.githubusercontent.com/4397546/222490039-b1f6156b-bf00-405b-9fda-0c9a9156f991.gif)
37
+
38
+ <b>TL;DR: &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; single portrait image 🙎‍♂️ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; audio 🎤 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; = &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; talking head video 🎞.</b>
39
+
40
+ <br>
41
+
42
+ </div>
43
+
44
+
45
+
46
+ ## Highlights
47
+
48
+ - The license has been updated to Apache 2.0, and we've removed the non-commercial restriction
49
+ - **SadTalker has now officially been integrated into Discord, where you can use it for free by sending files. You can also generate high-quailty videos from text prompts. Join: [![Discord](https://dcbadge.vercel.app/api/server/rrayYqZ4tf?style=flat)](https://discord.gg/rrayYqZ4tf)**
50
+
51
+ - We've published a [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) extension. Check out more details [here](docs/webui_extension.md). [Demo Video](https://user-images.githubusercontent.com/4397546/231495639-5d4bb925-ea64-4a36-a519-6389917dac29.mp4)
52
+
53
+ - Full image mode is now available! [More details...](https://github.com/OpenTalker/SadTalker#full-bodyimage-generation)
54
+
55
+ | still+enhancer in v0.0.1 | still + enhancer in v0.0.2 | [input image @bagbag1815](https://twitter.com/bagbag1815/status/1642754319094108161) |
56
+ |:--------------------: |:--------------------: | :----: |
57
+ | <video src="https://user-images.githubusercontent.com/48216707/229484996-5d7be64f-2553-4c9e-a452-c5cf0b8ebafe.mp4" type="video/mp4"> </video> | <video src="https://user-images.githubusercontent.com/4397546/230717873-355b7bf3-d3de-49f9-a439-9220e623fce7.mp4" type="video/mp4"> </video> | <img src='./examples/source_image/full_body_2.png' width='380'>
58
+
59
+ - Several new modes (Still, reference, and resize modes) are now available!
60
+
61
+ - We're happy to see more community demos on [bilibili](https://search.bilibili.com/all?keyword=sadtalker), [YouTube](https://www.youtube.com/results?search_query=sadtalker) and [X (#sadtalker)](https://twitter.com/search?q=%23sadtalker&src).
62
+
63
+ ## Changelog
64
+
65
+ The previous changelog can be found [here](docs/changlelog.md).
66
+
67
+ - __[2023.06.12]__: Added more new features in WebUI extension, see the discussion [here](https://github.com/OpenTalker/SadTalker/discussions/386).
68
+
69
+ - __[2023.06.05]__: Released a new 512x512px (beta) face model. Fixed some bugs and improve the performance.
70
+
71
+ - __[2023.04.15]__: Added a WebUI Colab notebook by [@camenduru](https://github.com/camenduru/): [![sd webui-colab](https://img.shields.io/badge/Automatic1111-Colab-green)](https://colab.research.google.com/github/camenduru/stable-diffusion-webui-colab/blob/main/video/stable/stable_diffusion_1_5_video_webui_colab.ipynb)
72
+
73
+ - __[2023.04.12]__: Added a more detailed WebUI installation document and fixed a problem when reinstalling.
74
+
75
+ - __[2023.04.12]__: Fixed the WebUI safe issues becasue of 3rd-party packages, and optimized the output path in `sd-webui-extension`.
76
+
77
+ - __[2023.04.08]__: In v0.0.2, we added a logo watermark to the generated video to prevent abuse. _This watermark has since been removed in a later release._
78
+
79
+ - __[2023.04.08]__: In v0.0.2, we added features for full image animation and a link to download checkpoints from Baidu. We also optimized the enhancer logic.
80
+
81
+ ## To-Do
82
+
83
+ We're tracking new updates in [issue #280](https://github.com/OpenTalker/SadTalker/issues/280).
84
+
85
+ ## Troubleshooting
86
+
87
+ If you have any problems, please read our [FAQs](docs/FAQ.md) before opening an issue.
88
+
89
+
90
+
91
+ ## 1. Installation.
92
+
93
+ Community tutorials: [中文Windows教程 (Chinese Windows tutorial)](https://www.bilibili.com/video/BV1Dc411W7V6/) | [日本語コース (Japanese tutorial)](https://br-d.fanbox.cc/posts/5685086).
94
+
95
+ ### Linux/Unix
96
+
97
+ 1. Install [Anaconda](https://www.anaconda.com/), Python and `git`.
98
+
99
+ 2. Creating the env and install the requirements.
100
+ ```bash
101
+ git clone https://github.com/OpenTalker/SadTalker.git
102
+
103
+ cd SadTalker
104
+
105
+ conda create -n sadtalker python=3.8
106
+
107
+ conda activate sadtalker
108
+
109
+ pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
110
+
111
+ conda install ffmpeg
112
+
113
+ pip install -r requirements.txt
114
+
115
+ ### Coqui TTS is optional for gradio demo.
116
+ ### pip install TTS
117
+
118
+ ```
119
+ ### Windows
120
+
121
+ A video tutorial in chinese is available [here](https://www.bilibili.com/video/BV1Dc411W7V6/). You can also follow the following instructions:
122
+
123
+ 1. Install [Python 3.8](https://www.python.org/downloads/windows/) and check "Add Python to PATH".
124
+ 2. Install [git](https://git-scm.com/download/win) manually or using [Scoop](https://scoop.sh/): `scoop install git`.
125
+ 3. Install `ffmpeg`, following [this tutorial](https://www.wikihow.com/Install-FFmpeg-on-Windows) or using [scoop](https://scoop.sh/): `scoop install ffmpeg`.
126
+ 4. Download the SadTalker repository by running `git clone https://github.com/Winfredy/SadTalker.git`.
127
+ 5. Download the checkpoints and gfpgan models in the [downloads section](#2-download-models).
128
+ 6. Run `start.bat` from Windows Explorer as normal, non-administrator, user, and a Gradio-powered WebUI demo will be started.
129
+
130
+ ### macOS
131
+
132
+ A tutorial on installing SadTalker on macOS can be found [here](docs/install.md).
133
+
134
+ ### Docker, WSL, etc
135
+
136
+ Please check out additional tutorials [here](docs/install.md).
137
+
138
+ ## 2. Download Models
139
+
140
+ You can run the following script on Linux/macOS to automatically download all the models:
141
+
142
+ ```bash
143
+ bash scripts/download_models.sh
144
+ ```
145
+
146
+ We also provide an offline patch (`gfpgan/`), so no model will be downloaded when generating.
147
+
148
+ ### Pre-Trained Models
149
+
150
+ * [Google Drive](https://drive.google.com/file/d/1gwWh45pF7aelNP_P78uDJL8Sycep-K7j/view?usp=sharing)
151
+ * [GitHub Releases](https://github.com/OpenTalker/SadTalker/releases)
152
+ * [Baidu (百度云盘)](https://pan.baidu.com/s/1kb1BCPaLOWX1JJb9Czbn6w?pwd=sadt) (Password: `sadt`)
153
+
154
+ <!-- TODO add Hugging Face links -->
155
+
156
+ ### GFPGAN Offline Patch
157
+
158
+ * [Google Drive](https://drive.google.com/file/d/19AIBsmfcHW6BRJmeqSFlG5fL445Xmsyi?usp=sharing)
159
+ * [GitHub Releases](https://github.com/OpenTalker/SadTalker/releases)
160
+ * [Baidu (百度云盘)](https://pan.baidu.com/s/1P4fRgk9gaSutZnn8YW034Q?pwd=sadt) (Password: `sadt`)
161
+
162
+ <!-- TODO add Hugging Face links -->
163
+
164
+
165
+ <details><summary>Model Details</summary>
166
+
167
+
168
+ Model explains:
169
+
170
+ ##### New version
171
+ | Model | Description
172
+ | :--- | :----------
173
+ |checkpoints/mapping_00229-model.pth.tar | Pre-trained MappingNet in Sadtalker.
174
+ |checkpoints/mapping_00109-model.pth.tar | Pre-trained MappingNet in Sadtalker.
175
+ |checkpoints/SadTalker_V0.0.2_256.safetensors | packaged sadtalker checkpoints of old version, 256 face render).
176
+ |checkpoints/SadTalker_V0.0.2_512.safetensors | packaged sadtalker checkpoints of old version, 512 face render).
177
+ |gfpgan/weights | Face detection and enhanced models used in `facexlib` and `gfpgan`.
178
+
179
+
180
+ ##### Old version
181
+ | Model | Description
182
+ | :--- | :----------
183
+ |checkpoints/auido2exp_00300-model.pth | Pre-trained ExpNet in Sadtalker.
184
+ |checkpoints/auido2pose_00140-model.pth | Pre-trained PoseVAE in Sadtalker.
185
+ |checkpoints/mapping_00229-model.pth.tar | Pre-trained MappingNet in Sadtalker.
186
+ |checkpoints/mapping_00109-model.pth.tar | Pre-trained MappingNet in Sadtalker.
187
+ |checkpoints/facevid2vid_00189-model.pth.tar | Pre-trained face-vid2vid model from [the reappearance of face-vid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis).
188
+ |checkpoints/epoch_20.pth | Pre-trained 3DMM extractor in [Deep3DFaceReconstruction](https://github.com/microsoft/Deep3DFaceReconstruction).
189
+ |checkpoints/wav2lip.pth | Highly accurate lip-sync model in [Wav2lip](https://github.com/Rudrabha/Wav2Lip).
190
+ |checkpoints/shape_predictor_68_face_landmarks.dat | Face landmark model used in [dilb](http://dlib.net/).
191
+ |checkpoints/BFM | 3DMM library file.
192
+ |checkpoints/hub | Face detection models used in [face alignment](https://github.com/1adrianb/face-alignment).
193
+ |gfpgan/weights | Face detection and enhanced models used in `facexlib` and `gfpgan`.
194
+
195
+ The final folder will be shown as:
196
+
197
+ <img width="331" alt="image" src="https://user-images.githubusercontent.com/4397546/232511411-4ca75cbf-a434-48c5-9ae0-9009e8316484.png">
198
+
199
+
200
+ </details>
201
+
202
+ ## 3. Quick Start
203
+
204
+ Please read our document on [best practices and configuration tips](docs/best_practice.md)
205
+
206
+ ### WebUI Demos
207
+
208
+ **Online Demo**: [HuggingFace](https://huggingface.co/spaces/vinthony/SadTalker) | [SDWebUI-Colab](https://colab.research.google.com/github/camenduru/stable-diffusion-webui-colab/blob/main/video/stable/stable_diffusion_1_5_video_webui_colab.ipynb) | [Colab](https://colab.research.google.com/github/Winfredy/SadTalker/blob/main/quick_demo.ipynb)
209
+
210
+ **Local WebUI extension**: Please refer to [WebUI docs](docs/webui_extension.md).
211
+
212
+ **Local gradio demo (recommanded)**: A Gradio instance similar to our [Hugging Face demo](https://huggingface.co/spaces/vinthony/SadTalker) can be run locally:
213
+
214
+ ```bash
215
+ ## you need manually install TTS(https://github.com/coqui-ai/TTS) via `pip install tts` in advanced.
216
+ python app_sadtalker.py
217
+ ```
218
+
219
+ You can also start it more easily:
220
+
221
+ - windows: just double click `webui.bat`, the requirements will be installed automatically.
222
+ - Linux/Mac OS: run `bash webui.sh` to start the webui.
223
+
224
+
225
+ ### CLI usage
226
+
227
+ ##### Animating a portrait image from default config:
228
+ ```bash
229
+ python inference.py --driven_audio <audio.wav> \
230
+ --source_image <video.mp4 or picture.png> \
231
+ --enhancer gfpgan
232
+ ```
233
+ The results will be saved in `results/$SOME_TIMESTAMP/*.mp4`.
234
+
235
+ ##### Full body/image Generation:
236
+
237
+ Using `--still` to generate a natural full body video. You can add `enhancer` to improve the quality of the generated video.
238
+
239
+ ```bash
240
+ python inference.py --driven_audio <audio.wav> \
241
+ --source_image <video.mp4 or picture.png> \
242
+ --result_dir <a file to store results> \
243
+ --still \
244
+ --preprocess full \
245
+ --enhancer gfpgan
246
+ ```
247
+
248
+ More examples and configuration and tips can be founded in the [ >>> best practice documents <<<](docs/best_practice.md).
249
+
250
+ ## Citation
251
+
252
+ If you find our work useful in your research, please consider citing:
253
+
254
+ ```bibtex
255
+ @article{zhang2022sadtalker,
256
+ title={SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation},
257
+ author={Zhang, Wenxuan and Cun, Xiaodong and Wang, Xuan and Zhang, Yong and Shen, Xi and Guo, Yu and Shan, Ying and Wang, Fei},
258
+ journal={arXiv preprint arXiv:2211.12194},
259
+ year={2022}
260
+ }
261
+ ```
262
+
263
+ ## Acknowledgements
264
+
265
+ Facerender code borrows heavily from [zhanglonghao's reproduction of face-vid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis) and [PIRender](https://github.com/RenYurui/PIRender). We thank the authors for sharing their wonderful code. In training process, we also used the model from [Deep3DFaceReconstruction](https://github.com/microsoft/Deep3DFaceReconstruction) and [Wav2lip](https://github.com/Rudrabha/Wav2Lip). We thank for their wonderful work.
266
+
267
+ We also use the following 3rd-party libraries:
268
+
269
+ - **Face Utils**: https://github.com/xinntao/facexlib
270
+ - **Face Enhancement**: https://github.com/TencentARC/GFPGAN
271
+ - **Image/Video Enhancement**:https://github.com/xinntao/Real-ESRGAN
272
+
273
+ ## Extensions:
274
+
275
+ - [SadTalker-Video-Lip-Sync](https://github.com/Zz-ww/SadTalker-Video-Lip-Sync) from [@Zz-ww](https://github.com/Zz-ww): SadTalker for Video Lip Editing
276
+
277
+ ## Related Works
278
+ - [StyleHEAT: One-Shot High-Resolution Editable Talking Face Generation via Pre-trained StyleGAN (ECCV 2022)](https://github.com/FeiiYin/StyleHEAT)
279
+ - [CodeTalker: Speech-Driven 3D Facial Animation with Discrete Motion Prior (CVPR 2023)](https://github.com/Doubiiu/CodeTalker)
280
+ - [VideoReTalking: Audio-based Lip Synchronization for Talking Head Video Editing In the Wild (SIGGRAPH Asia 2022)](https://github.com/vinthony/video-retalking)
281
+ - [DPE: Disentanglement of Pose and Expression for General Video Portrait Editing (CVPR 2023)](https://github.com/Carlyx/DPE)
282
+ - [3D GAN Inversion with Facial Symmetry Prior (CVPR 2023)](https://github.com/FeiiYin/SPI/)
283
+ - [T2M-GPT: Generating Human Motion from Textual Descriptions with Discrete Representations (CVPR 2023)](https://github.com/Mael-zys/T2M-GPT)
284
+
285
+ ## Disclaimer
286
+
287
+ This is not an official product of Tencent.
288
+
289
+ ```
290
+ 1. Please carefully read and comply with the open-source license applicable to this code before using it.
291
+ 2. Please carefully read and comply with the intellectual property declaration applicable to this code before using it.
292
+ 3. This open-source code runs completely offline and does not collect any personal information or other data. If you use this code to provide services to end-users and collect related data, please take necessary compliance measures according to applicable laws and regulations (such as publishing privacy policies, adopting necessary data security strategies, etc.). If the collected data involves personal information, user consent must be obtained (if applicable). Any legal liabilities arising from this are unrelated to Tencent.
293
+ 4. Without Tencent's written permission, you are not authorized to use the names or logos legally owned by Tencent, such as "Tencent." Otherwise, you may be liable for legal responsibilities.
294
+ 5. This open-source code does not have the ability to directly provide services to end-users. If you need to use this code for further model training or demos, as part of your product to provide services to end-users, or for similar use, please comply with applicable laws and regulations for your product or service. Any legal liabilities arising from this are unrelated to Tencent.
295
+ 6. It is prohibited to use this open-source code for activities that harm the legitimate rights and interests of others (including but not limited to fraud, deception, infringement of others' portrait rights, reputation rights, etc.), or other behaviors that violate applicable laws and regulations or go against social ethics and good customs (including providing incorrect or false information, spreading pornographic, terrorist, and violent information, etc.). Otherwise, you may be liable for legal responsibilities.
296
+ ```
297
+
298
+ LOGO: color and font suggestion: [ChatGPT](https://chat.openai.com), logo font: [Montserrat Alternates
299
+ ](https://fonts.google.com/specimen/Montserrat+Alternates?preview.text=SadTalker&preview.text_type=custom&query=mont).
300
+
301
+ All the copyrights of the demo images and audio are from community users or the generation from stable diffusion. Feel free to contact us if you would like use to remove them.
302
+
303
+
304
+ <!-- Spelling fixed on Tuesday, September 12, 2023 by @fakerybakery (https://github.com/fakerybakery). These changes are licensed under the Apache 2.0 license. -->
app_sadtalker.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ import gradio as gr
3
+ from src.gradio_demo import SadTalker
4
+
5
+
6
+ try:
7
+ import webui # in webui
8
+ in_webui = True
9
+ except:
10
+ in_webui = False
11
+
12
+
13
+ def toggle_audio_file(choice):
14
+ if choice == False:
15
+ return gr.update(visible=True), gr.update(visible=False)
16
+ else:
17
+ return gr.update(visible=False), gr.update(visible=True)
18
+
19
+ def ref_video_fn(path_of_ref_video):
20
+ if path_of_ref_video is not None:
21
+ return gr.update(value=True)
22
+ else:
23
+ return gr.update(value=False)
24
+
25
+ def sadtalker_demo(checkpoint_path='checkpoints', config_path='src/config', warpfn=None):
26
+
27
+ sad_talker = SadTalker(checkpoint_path, config_path, lazy_load=True)
28
+
29
+ with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
30
+ gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
31
+ <a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
32
+ <a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
33
+ <a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
34
+
35
+ with gr.Row().style(equal_height=False):
36
+ with gr.Column(variant='panel'):
37
+ with gr.Tabs(elem_id="sadtalker_source_image"):
38
+ with gr.TabItem('Upload image'):
39
+ with gr.Row():
40
+ source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image").style(width=512)
41
+
42
+ with gr.Tabs(elem_id="sadtalker_driven_audio"):
43
+ with gr.TabItem('Upload OR TTS'):
44
+ with gr.Column(variant='panel'):
45
+ driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
46
+
47
+ if sys.platform != 'win32' and not in_webui:
48
+ from src.utils.text2speech import TTSTalker
49
+ tts_talker = TTSTalker()
50
+ with gr.Column(variant='panel'):
51
+ input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
52
+ tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
53
+ tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
54
+
55
+ with gr.Column(variant='panel'):
56
+ with gr.Tabs(elem_id="sadtalker_checkbox"):
57
+ with gr.TabItem('Settings'):
58
+ gr.Markdown("need help? please visit our [best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md) for more detials")
59
+ with gr.Column(variant='panel'):
60
+ # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
61
+ # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
62
+ pose_style = gr.Slider(minimum=0, maximum=46, step=1, label="Pose style", value=0) #
63
+ size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") #
64
+ preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
65
+ is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion, works with preprocess `full`)")
66
+ batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=2)
67
+ enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
68
+ submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
69
+
70
+ with gr.Tabs(elem_id="sadtalker_genearted"):
71
+ gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
72
+
73
+ if warpfn:
74
+ submit.click(
75
+ fn=warpfn(sad_talker.test),
76
+ inputs=[source_image,
77
+ driven_audio,
78
+ preprocess_type,
79
+ is_still_mode,
80
+ enhancer,
81
+ batch_size,
82
+ size_of_image,
83
+ pose_style
84
+ ],
85
+ outputs=[gen_video]
86
+ )
87
+ else:
88
+ submit.click(
89
+ fn=sad_talker.test,
90
+ inputs=[source_image,
91
+ driven_audio,
92
+ preprocess_type,
93
+ is_still_mode,
94
+ enhancer,
95
+ batch_size,
96
+ size_of_image,
97
+ pose_style
98
+ ],
99
+ outputs=[gen_video]
100
+ )
101
+
102
+ return sadtalker_interface
103
+
104
+
105
+ if __name__ == "__main__":
106
+ demo = sadtalker_demo()
107
+ demo.queue()
108
+ demo.launch(share=True)
109
+ demo.deploy(name="NamastexLabs_SadTalker")
checkpoints/SadTalker_V0.0.2_256.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c211f5d6de003516bf1bbda9f47049a4c9c99133b1ab565c6961e5af16477bff
3
+ size 725066984
checkpoints/SadTalker_V0.0.2_512.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e063f7ff5258240bdb0f7690783a7b1374e6a4a81ce8fa33456f4cd49694340
3
+ size 725066984
checkpoints/mapping_00109-model.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84a8642468a3fcfdd9ab6be955267043116c2bec2284686a5262f1eaf017f64c
3
+ size 155779231
checkpoints/mapping_00229-model.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1
3
+ size 155521183
cog.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ build:
2
+ gpu: true
3
+ cuda: "11.3"
4
+ python_version: "3.8"
5
+ system_packages:
6
+ - "ffmpeg"
7
+ - "libgl1-mesa-glx"
8
+ - "libglib2.0-0"
9
+ python_packages:
10
+ - "torch==1.12.1"
11
+ - "torchvision==0.13.1"
12
+ - "torchaudio==0.12.1"
13
+ - "joblib==1.1.0"
14
+ - "scikit-image==0.19.3"
15
+ - "basicsr==1.4.2"
16
+ - "facexlib==0.3.0"
17
+ - "resampy==0.3.1"
18
+ - "pydub==0.25.1"
19
+ - "scipy==1.10.1"
20
+ - "kornia==0.6.8"
21
+ - "face_alignment==1.3.5"
22
+ - "imageio==2.19.3"
23
+ - "imageio-ffmpeg==0.4.7"
24
+ - "librosa==0.9.2" #
25
+ - "tqdm==4.65.0"
26
+ - "yacs==0.1.8"
27
+ - "gfpgan==1.3.8"
28
+ - "dlib-bin==19.24.1"
29
+ - "av==10.0.0"
30
+ - "trimesh==3.9.20"
31
+ run:
32
+ - mkdir -p /root/.cache/torch/hub/checkpoints/ && wget --output-document "/root/.cache/torch/hub/checkpoints/s3fd-619a316812.pth" "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
33
+ - mkdir -p /root/.cache/torch/hub/checkpoints/ && wget --output-document "/root/.cache/torch/hub/checkpoints/2DFAN4-cd938726ad.zip" "https://www.adrianbulat.com/downloads/python-fan/2DFAN4-cd938726ad.zip"
34
+
35
+ predict: "predict.py:Predictor"
docs/FAQ.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ## Frequency Asked Question
3
+
4
+ **Q: `ffmpeg` is not recognized as an internal or external command**
5
+
6
+ In Linux, you can install the ffmpeg via `conda install ffmpeg`. Or on Mac OS X, try to install ffmpeg via `brew install ffmpeg`. On windows, make sure you have `ffmpeg` in the `%PATH%` as suggested in [#54](https://github.com/Winfredy/SadTalker/issues/54), then, following [this](https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/) installation to install `ffmpeg`.
7
+
8
+ **Q: Running Requirments.**
9
+
10
+ Please refer to the discussion here: https://github.com/Winfredy/SadTalker/issues/124#issuecomment-1508113989
11
+
12
+
13
+ **Q: ModuleNotFoundError: No module named 'ai'**
14
+
15
+ please check the checkpoint's size of the `epoch_20.pth`. (https://github.com/Winfredy/SadTalker/issues/167, https://github.com/Winfredy/SadTalker/issues/113)
16
+
17
+ **Q: Illegal Hardware Error: Mac M1**
18
+
19
+ please reinstall the `dlib` by `pip install dlib` individually. (https://github.com/Winfredy/SadTalker/issues/129, https://github.com/Winfredy/SadTalker/issues/109)
20
+
21
+
22
+ **Q: FileNotFoundError: [Errno 2] No such file or directory: checkpoints\BFM_Fitting\similarity_Lm3D_all.mat**
23
+
24
+ Make sure you have downloaded the checkpoints and gfpgan as [here](https://github.com/Winfredy/SadTalker#-2-download-trained-models) and placed them in the right place.
25
+
26
+ **Q: RuntimeError: unexpected EOF, expected 237192 more bytes. The file might be corrupted.**
27
+
28
+ The files are not automatically downloaded. Please update the code and download the gfpgan folders as [here](https://github.com/Winfredy/SadTalker#-2-download-trained-models).
29
+
30
+ **Q: CUDA out of memory error**
31
+
32
+ please refer to https://stackoverflow.com/questions/73747731/runtimeerror-cuda-out-of-memory-how-setting-max-split-size-mb
33
+
34
+ ```
35
+ # windows
36
+ set PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
37
+ python inference.py ...
38
+
39
+ # linux
40
+ export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
41
+ python inference.py ...
42
+ ```
43
+
44
+ **Q: Error while decoding stream #0:0: Invalid data found when processing input [mp3float @ 0000015037628c00] Header missing**
45
+
46
+ Our method only support wav or mp3 files as input, please make sure the feeded audios are in these formats.
docs/best_practice.md ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Best Practices and Tips for configuration
2
+
3
+ > Our model only works on REAL people or the portrait image similar to REAL person. The anime talking head genreation method will be released in future.
4
+
5
+ Advanced confiuration options for `inference.py`:
6
+
7
+ | Name | Configuration | default | Explaination |
8
+ |:------------- |:------------- |:----- | :------------- |
9
+ | Enhance Mode | `--enhancer` | None | Using `gfpgan` or `RestoreFormer` to enhance the generated face via face restoration network
10
+ | Background Enhancer | `--background_enhancer` | None | Using `realesrgan` to enhance the full video.
11
+ | Still Mode | ` --still` | False | Using the same pose parameters as the original image, fewer head motion.
12
+ | Expressive Mode | `--expression_scale` | 1.0 | a larger value will make the expression motion stronger.
13
+ | save path | `--result_dir` |`./results` | The file will be save in the newer location.
14
+ | preprocess | `--preprocess` | `crop` | Run and produce the results in the croped input image. Other choices: `resize`, where the images will be resized to the specific resolution. `full` Run the full image animation, use with `--still` to get better results.
15
+ | ref Mode (eye) | `--ref_eyeblink` | None | A video path, where we borrow the eyeblink from this reference video to provide more natural eyebrow movement.
16
+ | ref Mode (pose) | `--ref_pose` | None | A video path, where we borrow the pose from the head reference video.
17
+ | 3D Mode | `--face3dvis` | False | Need additional installation. More details to generate the 3d face can be founded [here](docs/face3d.md).
18
+ | free-view Mode | `--input_yaw`,<br> `--input_pitch`,<br> `--input_roll` | None | Genearting novel view or free-view 4D talking head from a single image. More details can be founded [here](https://github.com/Winfredy/SadTalker#generating-4d-free-view-talking-examples-from-audio-and-a-single-image).
19
+
20
+
21
+ ### About `--preprocess`
22
+
23
+ Our system automatically handles the input images via `crop`, `resize` and `full`.
24
+
25
+ In `crop` mode, we only generate the croped image via the facial keypoints and generated the facial anime avator. The animation of both expression and head pose are realistic.
26
+
27
+ > Still mode will stop the eyeblink and head pose movement.
28
+
29
+ | [input image @bagbag1815](https://twitter.com/bagbag1815/status/1642754319094108161) | crop | crop w/still |
30
+ |:--------------------: |:--------------------: | :----: |
31
+ | <img src='../examples/source_image/full_body_2.png' width='380'> | ![full_body_2](example_crop.gif) | ![full_body_2](example_crop_still.gif) |
32
+
33
+
34
+ In `resize` mode, we resize the whole images to generate the fully talking head video. Thus, an image similar to the ID photo can be produced. ⚠️ It will produce bad results for full person images.
35
+
36
+
37
+
38
+
39
+ | <img src='../examples/source_image/full_body_2.png' width='380'> | <img src='../examples/source_image/full4.jpeg' width='380'> |
40
+ |:--------------------: |:--------------------: |
41
+ | ❌ not suitable for resize mode | ✅ good for resize mode |
42
+ | <img src='resize_no.gif'> | <img src='resize_good.gif' width='380'> |
43
+
44
+ In `full` mode, our model will automatically process the croped region and paste back to the original image. Remember to use `--still` to keep the original head pose.
45
+
46
+ | input | `--still` | `--still` & `enhancer` |
47
+ |:--------------------: |:--------------------: | :--:|
48
+ | <img src='../examples/source_image/full_body_2.png' width='380'> | <img src='./example_full.gif' width='380'> | <img src='./example_full_enhanced.gif' width='380'>
49
+
50
+
51
+ ### About `--enhancer`
52
+
53
+ For higher resolution, we intergate [gfpgan](https://github.com/TencentARC/GFPGAN) and [real-esrgan](https://github.com/xinntao/Real-ESRGAN) for different purpose. Just adding `--enhancer <gfpgan or RestoreFormer>` or `--background_enhancer <realesrgan>` for the enhancement of the face and the full image.
54
+
55
+ ```bash
56
+ # make sure above packages are available:
57
+ pip install gfpgan
58
+ pip install realesrgan
59
+ ```
60
+
61
+ ### About `--face3dvis`
62
+
63
+ This flag indicate that we can generated the 3d-rendered face and it's 3d facial landmarks. More details can be founded [here](face3d.md).
64
+
65
+ | Input | Animated 3d face |
66
+ |:-------------: | :-------------: |
67
+ | <img src='../examples/source_image/art_0.png' width='200px'> | <video src="https://user-images.githubusercontent.com/4397546/226856847-5a6a0a4d-a5ec-49e2-9b05-3206db65e8e3.mp4"></video> |
68
+
69
+ > Kindly ensure to activate the audio as the default audio playing is incompatible with GitHub.
70
+
71
+
72
+
73
+ #### Reference eye-link mode.
74
+
75
+ | Input, w/ reference video , reference video |
76
+ |:-------------: |
77
+ | ![free_view](using_ref_video.gif)|
78
+ | If the reference video is shorter than the input audio, we will loop the reference video .
79
+
80
+
81
+
82
+ #### Generating 4D free-view talking examples from audio and a single image
83
+
84
+ We use `input_yaw`, `input_pitch`, `input_roll` to control head pose. For example, `--input_yaw -20 30 10` means the input head yaw degree changes from -20 to 30 and then changes from 30 to 10.
85
+ ```bash
86
+ python inference.py --driven_audio <audio.wav> \
87
+ --source_image <video.mp4 or picture.png> \
88
+ --result_dir <a file to store results> \
89
+ --input_yaw -20 30 10
90
+ ```
91
+
92
+ | Results, Free-view results, Novel view results |
93
+ |:-------------: |
94
+ | ![free_view](free_view_result.gif)|
docs/changlelog.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## changelogs
2
+
3
+
4
+ - __[2023.04.06]__: stable-diffiusion webui extension is release.
5
+
6
+ - __[2023.04.03]__: Enable TTS in huggingface and gradio local demo.
7
+
8
+ - __[2023.03.30]__: Launch beta version of the full body mode.
9
+
10
+ - __[2023.03.30]__: Launch new feature: through using reference videos, our algorithm can generate videos with more natural eye blinking and some eyebrow movement.
11
+
12
+ - __[2023.03.29]__: `resize mode` is online by `python infererence.py --preprocess resize`! Where we can produce a larger crop of the image as discussed in https://github.com/Winfredy/SadTalker/issues/35.
13
+
14
+ - __[2023.03.29]__: local gradio demo is online! `python app.py` to start the demo. New `requirments.txt` is used to avoid the bugs in `librosa`.
15
+
16
+ - __[2023.03.28]__: Online demo is launched in [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/vinthony/SadTalker), thanks AK!
17
+
18
+ - __[2023.03.22]__: Launch new feature: generating the 3d face animation from a single image. New applications about it will be updated.
19
+
20
+ - __[2023.03.22]__: Launch new feature: `still mode`, where only a small head pose will be produced via `python inference.py --still`.
21
+
22
+ - __[2023.03.18]__: Support `expression intensity`, now you can change the intensity of the generated motion: `python inference.py --expression_scale 1.3 (some value > 1)`.
23
+
24
+ - __[2023.03.18]__: Reconfig the data folders, now you can download the checkpoint automatically using `bash scripts/download_models.sh`.
25
+ - __[2023.03.18]__: We have offically integrate the [GFPGAN](https://github.com/TencentARC/GFPGAN) for face enhancement, using `python inference.py --enhancer gfpgan` for better visualization performance.
26
+ - __[2023.03.14]__: Specify the version of package `joblib` to remove the errors in using `librosa`, [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Winfredy/SadTalker/blob/main/quick_demo.ipynb) is online!
27
+ - __[2023.03.06]__: Solve some bugs in code and errors in installation
28
+ - __[2023.03.03]__: Release the test code for audio-driven single image animation!
29
+ - __[2023.02.28]__: SadTalker has been accepted by CVPR 2023!
docs/example_crop.gif ADDED

Git LFS Details

  • SHA256: da08306e3e6355928887e74057ee4221f9d877d8536341d907e29fe35e078b45
  • Pointer size: 132 Bytes
  • Size of remote file: 1.55 MB
docs/example_crop_still.gif ADDED

Git LFS Details

  • SHA256: 667c7531ed0a4d97a3ca9b15f79eea655b93dc40eda94498aa43b9e6a48c49aa
  • Pointer size: 132 Bytes
  • Size of remote file: 1.25 MB
docs/example_full.gif ADDED

Git LFS Details

  • SHA256: 2d1a2b8f5ed7b942a8625a5767828c1bc47568165a187079fbbb8492ed57301b
  • Pointer size: 132 Bytes
  • Size of remote file: 1.46 MB
docs/example_full_crop.gif ADDED
docs/example_full_enhanced.gif ADDED

Git LFS Details

  • SHA256: 906ca893e72854021c7715f784dc3fe219bbe67b73ff461e6ba8374f0d3b4712
  • Pointer size: 132 Bytes
  • Size of remote file: 5.78 MB
docs/face3d.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 3D Face Visualization
2
+
3
+ We use `pytorch3d` to visualize the 3D faces from a single image.
4
+
5
+ The requirements for 3D visualization are difficult to install, so here's a tutorial:
6
+
7
+ ```bash
8
+ git clone https://github.com/OpenTalker/SadTalker.git
9
+ cd SadTalker
10
+ conda create -n sadtalker3d python=3.8
11
+ source activate sadtalker3d
12
+
13
+ conda install ffmpeg
14
+ conda install -c fvcore -c iopath -c conda-forge fvcore iopath
15
+ conda install libgcc gmp
16
+
17
+ pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113
18
+
19
+ # insintall pytorch3d
20
+ pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu113_pyt1110/download.html
21
+
22
+ pip install -r requirements3d.txt
23
+
24
+ ### install gpfgan for enhancer
25
+ pip install git+https://github.com/TencentARC/GFPGAN
26
+
27
+
28
+ ### when occurs gcc version problem `from pytorch import _C` from pytorch3d, add the anaconda path to LD_LIBRARY_PATH
29
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/$YOUR_ANACONDA_PATH/lib/
30
+
31
+ ```
32
+
33
+ Then, generate the result via:
34
+
35
+ ```bash
36
+
37
+
38
+ python inference.py --driven_audio <audio.wav> \
39
+ --source_image <video.mp4 or picture.png> \
40
+ --result_dir <a file to store results> \
41
+ --face3dvis
42
+
43
+ ```
44
+
45
+ The results will appear, named `face3d.mp4`.
46
+
47
+ More applications about 3D face rendering will be released soon.
docs/free_view_result.gif ADDED

Git LFS Details

  • SHA256: 035a7fba6800964254728f82fec47fe5c91458183e19a7506dd54d89940af40f
  • Pointer size: 132 Bytes
  • Size of remote file: 5.61 MB
docs/install.md ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### macOS
2
+
3
+ This method has been tested on a M1 Mac (13.3)
4
+
5
+ ```bash
6
+ git clone https://github.com/OpenTalker/SadTalker.git
7
+ cd SadTalker
8
+ conda create -n sadtalker python=3.8
9
+ conda activate sadtalker
10
+ # install pytorch 2.0
11
+ pip install torch torchvision torchaudio
12
+ conda install ffmpeg
13
+ pip install -r requirements.txt
14
+ pip install dlib # macOS needs to install the original dlib.
15
+ ```
16
+
17
+ ### Windows Native
18
+
19
+ - Make sure you have `ffmpeg` in the `%PATH%` as suggested in [#54](https://github.com/Winfredy/SadTalker/issues/54), following [this](https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/) tutorial to install `ffmpeg` or using scoop.
20
+
21
+
22
+ ### Windows WSL
23
+
24
+
25
+ - Make sure the environment: `export LD_LIBRARY_PATH=/usr/lib/wsl/lib:$LD_LIBRARY_PATH`
26
+
27
+
28
+ ### Docker Installation
29
+
30
+ A community Docker image by [@thegenerativegeneration](https://github.com/thegenerativegeneration) is available on the [Docker hub](https://hub.docker.com/repository/docker/wawa9000/sadtalker), which can be used directly:
31
+ ```bash
32
+ docker run --gpus "all" --rm -v $(pwd):/host_dir wawa9000/sadtalker \
33
+ --driven_audio /host_dir/deyu.wav \
34
+ --source_image /host_dir/image.jpg \
35
+ --expression_scale 1.0 \
36
+ --still \
37
+ --result_dir /host_dir
38
+ ```
39
+
docs/resize_good.gif ADDED

Git LFS Details

  • SHA256: ada6f2ea847e71c2a963882fd83f6b54193f4fe7c402f9f20698632b15bbdc0c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.73 MB
docs/resize_no.gif ADDED

Git LFS Details

  • SHA256: c7702f0be5c87c8977bf3c4a73ea4d27e90d0a5a3015816abb880cfd8f75c6ac
  • Pointer size: 132 Bytes
  • Size of remote file: 2.14 MB
docs/sadtalker_logo.png ADDED
docs/using_ref_video.gif ADDED

Git LFS Details

  • SHA256: 9bb68ae077a6c009e7d30a36d34c30bf1310a073ab3c7d9cc1b5c9abe285e888
  • Pointer size: 132 Bytes
  • Size of remote file: 8.11 MB
docs/webui_extension.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Run SadTalker as a Stable Diffusion WebUI Extension.
2
+
3
+ 1. Install the lastest version of [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) and install SadTalker via `extension`.
4
+ <img width="726" alt="image" src="https://user-images.githubusercontent.com/4397546/230698519-267d1d1f-6e99-4dd4-81e1-7b889259efbd.png">
5
+
6
+ 2. Download the checkpoints manually, for Linux and Mac:
7
+
8
+ ```bash
9
+
10
+ cd SOMEWHERE_YOU_LIKE
11
+
12
+ bash <(wget -qO- https://raw.githubusercontent.com/Winfredy/OpenTalker/main/scripts/download_models.sh)
13
+ ```
14
+
15
+ For Windows, you can download all the checkpoints [here](https://github.com/OpenTalker/SadTalker/tree/main#2-download-models).
16
+
17
+ 3.1. Option 1: put the checkpoint in `stable-diffusion-webui/models/SadTalker` or `stable-diffusion-webui/extensions/SadTalker/checkpoints/`, the checkpoints will be detected automatically.
18
+
19
+ 3.2. Option 2: Set the path of `SADTALKTER_CHECKPOINTS` in `webui_user.sh`(linux) or `webui_user.bat`(windows) by:
20
+
21
+ > only works if you are directly starting webui from `webui_user.sh` or `webui_user.bat`.
22
+
23
+ ```bash
24
+ # Windows (webui_user.bat)
25
+ set SADTALKER_CHECKPOINTS=D:\SadTalker\checkpoints
26
+
27
+ # Linux/macOS (webui_user.sh)
28
+ export SADTALKER_CHECKPOINTS=/path/to/SadTalker/checkpoints
29
+ ```
30
+
31
+ 4. Start the WebUI via `webui.sh or webui_user.sh(linux)` or `webui_user.bat(windows)` or any other method. SadTalker can also be used in stable-diffusion-webui directly.
32
+
33
+ <img width="726" alt="image" src="https://user-images.githubusercontent.com/4397546/230698614-58015182-2916-4240-b324-e69022ef75b3.png">
34
+
35
+ ## Questions
36
+
37
+ 1. if you are running on CPU, you need to specific `--disable-safe-unpickle` in `webui_user.sh` or `webui_user.bat`.
38
+
39
+ ```bash
40
+ # windows (webui_user.bat)
41
+ set COMMANDLINE_ARGS="--disable-safe-unpickle"
42
+
43
+ # linux (webui_user.sh)
44
+ export COMMANDLINE_ARGS="--disable-safe-unpickle"
45
+ ```
46
+
47
+
48
+
49
+ (If you're unable to use the `full` mode, please read this [discussion](https://github.com/Winfredy/SadTalker/issues/78).)
examples/driven_audio/RD_Radio31_000.wav ADDED
Binary file (512 kB). View file
 
examples/driven_audio/RD_Radio34_002.wav ADDED
Binary file (512 kB). View file
 
examples/driven_audio/RD_Radio36_000.wav ADDED
Binary file (512 kB). View file
 
examples/driven_audio/RD_Radio40_000.wav ADDED
Binary file (512 kB). View file
 
examples/driven_audio/bus_chinese.wav ADDED
Binary file (652 kB). View file
 
examples/driven_audio/chinese_news.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b0f4d313a1ca671bc4831d60bcf0c12225efbffe6c0e93e54fbfe9bcd4021cb
3
+ size 1536078
examples/driven_audio/chinese_poem1.wav ADDED
Binary file (263 kB). View file
 
examples/driven_audio/chinese_poem2.wav ADDED
Binary file (461 kB). View file
 
examples/driven_audio/deyu.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba1839c57770a2ab0b593ce814344bfd4d750da02acc9be9e8cf5b9113a0f88a
3
+ size 2694784
examples/driven_audio/eluosi.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a3593815dc7b68c256672baa61934c9479efa770af2065fb0886f02713606e
3
+ size 1786672
examples/driven_audio/fayu.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16ebd13626ae4171030b4ea05cceef06078483c352e4b68d469fc2a52bfffceb
3
+ size 1940428
examples/driven_audio/imagine.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2db410217e074d91ae6011e1c5dc0b94f02d05d381c50af8e54253eeacad17d2
3
+ size 1618510
examples/driven_audio/itosinger1.wav ADDED
Binary file (500 kB). View file
 
examples/driven_audio/japanese.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3db5426d0b158799e2be4f609b11f75bfbd4affffe18e9a1c8e6f241fcdedcfc
3
+ size 2622712
examples/ref_video/WDA_AlexandriaOcasioCortez_000.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a85242c3fc4d50e2202cea393b9e7ee59019759b68e78e26a254d528c22615a7
3
+ size 2257667
examples/ref_video/WDA_KatieHill_000.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fbb4cfd64eedc49b170c441714a9c4fd5e2c2f8a11592070ad89fbd257f2817
3
+ size 3548230
examples/source_image/art_0.png ADDED
examples/source_image/art_1.png ADDED
examples/source_image/art_10.png ADDED
examples/source_image/art_11.png ADDED
examples/source_image/art_12.png ADDED
examples/source_image/art_13.png ADDED
examples/source_image/art_14.png ADDED
examples/source_image/art_15.png ADDED