zhiqing0205
Claude
commited on
Commit
·
ece7754
1
Parent(s):
d403caa
Add complete U2Net project with HuggingFace preparation
Browse files- Add comprehensive .gitignore for Python projects
- Configure Git LFS for large files including:
- PyTorch model files (.pth): 338MB total
- Image datasets and results
- ZIP archives and media files
- Include all Python source code, documentation, and test data
- Ready for HuggingFace Hub deployment
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <[email protected]>
This view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +149 -0
- LICENSE +201 -0
- QUICKSTART.md +76 -0
- README.md +215 -3
- README_CN.md +196 -0
- README_HF.md +141 -0
- README_original.md +218 -0
- clipping_camera.jpg +3 -0
- data_loader.py +266 -0
- download_from_hf.py +96 -0
- fg_mask_mvtec_loco.zip +3 -0
- figures/3d-photo-re.jpg +3 -0
- figures/LensOCR.jpg +3 -0
- figures/U2NETPR.png +3 -0
- figures/U2Net_Logo.png +3 -0
- figures/art_transfer.JPG +3 -0
- figures/bg-removal.gif +3 -0
- figures/bg-rm-aug.gif +3 -0
- figures/clipping_camera.jpg +3 -0
- figures/close-seg.jpg +3 -0
- figures/gradio_web_demo.jpg +3 -0
- figures/hotpot.png +3 -0
- figures/human_seg.png +3 -0
- figures/human_seg_results.gif +3 -0
- figures/human_seg_video.gif +3 -0
- figures/im_composite.jpg +3 -0
- figures/lensto.png +3 -0
- figures/motor-demo.gif +3 -0
- figures/pixelmator.jpg +3 -0
- figures/portrait-ios-app.jpg +3 -0
- figures/portrait_kids.png +3 -0
- figures/portrait_ladies.png +3 -0
- figures/portrait_men.png +3 -0
- figures/profuai.png +3 -0
- figures/qual.png +3 -0
- figures/quan_1.png +3 -0
- figures/quan_2.png +3 -0
- figures/rembg.png +3 -0
- figures/rm_bg.JPG +3 -0
- figures/ship-demo.gif +3 -0
- figures/silueta.png +3 -0
- figures/sky-seg.png +3 -0
- figures/style-trans.JPG +3 -0
- figures/swift-u2net.jpeg +3 -0
- figures/u2net-best-paper.jpg +3 -0
- figures/u2netqual.png +3 -0
- figures/view-move.gif +3 -0
- figures/xuebin-demo.png +3 -0
- gradio/demo.py +37 -0
- model/__init__.py +2 -0
.gitignore
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
pip-wheel-metadata/
|
24 |
+
share/python-wheels/
|
25 |
+
*.egg-info/
|
26 |
+
.installed.cfg
|
27 |
+
*.egg
|
28 |
+
MANIFEST
|
29 |
+
|
30 |
+
# PyInstaller
|
31 |
+
# Usually these files are written by a python script from a template
|
32 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
33 |
+
*.manifest
|
34 |
+
*.spec
|
35 |
+
|
36 |
+
# Installer logs
|
37 |
+
pip-log.txt
|
38 |
+
pip-delete-this-directory.txt
|
39 |
+
|
40 |
+
# Unit test / coverage reports
|
41 |
+
htmlcov/
|
42 |
+
.tox/
|
43 |
+
.nox/
|
44 |
+
.coverage
|
45 |
+
.coverage.*
|
46 |
+
.cache
|
47 |
+
nosetests.xml
|
48 |
+
coverage.xml
|
49 |
+
*.cover
|
50 |
+
*.py,cover
|
51 |
+
.hypothesis/
|
52 |
+
.pytest_cache/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
target/
|
76 |
+
|
77 |
+
# Jupyter Notebook
|
78 |
+
.ipynb_checkpoints
|
79 |
+
|
80 |
+
# IPython
|
81 |
+
profile_default/
|
82 |
+
ipython_config.py
|
83 |
+
|
84 |
+
# pyenv
|
85 |
+
.python-version
|
86 |
+
|
87 |
+
# pipenv
|
88 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
89 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
90 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
91 |
+
# install all needed dependencies.
|
92 |
+
#Pipfile.lock
|
93 |
+
|
94 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
95 |
+
__pypackages__/
|
96 |
+
|
97 |
+
# Celery stuff
|
98 |
+
celerybeat-schedule
|
99 |
+
celerybeat.pid
|
100 |
+
|
101 |
+
# SageMath parsed files
|
102 |
+
*.sage.py
|
103 |
+
|
104 |
+
# Environments
|
105 |
+
.env
|
106 |
+
.venv
|
107 |
+
env/
|
108 |
+
venv/
|
109 |
+
ENV/
|
110 |
+
env.bak/
|
111 |
+
venv.bak/
|
112 |
+
|
113 |
+
# Spyder project settings
|
114 |
+
.spyderproject
|
115 |
+
.spyproject
|
116 |
+
|
117 |
+
# Rope project settings
|
118 |
+
.ropeproject
|
119 |
+
|
120 |
+
# mkdocs documentation
|
121 |
+
/site
|
122 |
+
|
123 |
+
# mypy
|
124 |
+
.mypy_cache/
|
125 |
+
.dmypy.json
|
126 |
+
dmypy.json
|
127 |
+
|
128 |
+
# Pyre type checker
|
129 |
+
.pyre/
|
130 |
+
|
131 |
+
# OS generated files
|
132 |
+
.DS_Store
|
133 |
+
.DS_Store?
|
134 |
+
._*
|
135 |
+
.Spotlight-V100
|
136 |
+
.Trashes
|
137 |
+
ehthumbs.db
|
138 |
+
Thumbs.db
|
139 |
+
|
140 |
+
# IDE files
|
141 |
+
.vscode/
|
142 |
+
.idea/
|
143 |
+
*.swp
|
144 |
+
*.swo
|
145 |
+
*~
|
146 |
+
|
147 |
+
# Temporary files
|
148 |
+
*.tmp
|
149 |
+
*.temp
|
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
QUICKSTART.md
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 🚀 Quick Start Guide
|
2 |
+
|
3 |
+
## One-Command Setup
|
4 |
+
|
5 |
+
### Method 1: Using our setup script
|
6 |
+
```bash
|
7 |
+
# Download the setup script
|
8 |
+
curl -O https://huggingface.co/zhiqing0205/u2net-mvtec-loco-segmentation/raw/main/setup_project.py
|
9 |
+
|
10 |
+
# Run setup (downloads everything automatically)
|
11 |
+
python setup_project.py
|
12 |
+
|
13 |
+
# Use the project
|
14 |
+
cd u2net-mvtec-loco
|
15 |
+
python mvtec_loco_fg_segmentation.py
|
16 |
+
```
|
17 |
+
|
18 |
+
### Method 2: Using HuggingFace CLI
|
19 |
+
```bash
|
20 |
+
# Install HuggingFace CLI
|
21 |
+
pip install huggingface_hub
|
22 |
+
|
23 |
+
# Download complete project (equivalent to git clone)
|
24 |
+
huggingface-cli download zhiqing0205/u2net-mvtec-loco-segmentation \
|
25 |
+
--local-dir ./u2net-project --repo-type model
|
26 |
+
|
27 |
+
# Use the project
|
28 |
+
cd u2net-project
|
29 |
+
python mvtec_loco_fg_segmentation.py
|
30 |
+
```
|
31 |
+
|
32 |
+
### Method 3: Using Python
|
33 |
+
```bash
|
34 |
+
# One-liner to download everything
|
35 |
+
python -c "
|
36 |
+
from huggingface_hub import snapshot_download
|
37 |
+
snapshot_download('zhiqing0205/u2net-mvtec-loco-segmentation', local_dir='./u2net-project')
|
38 |
+
print('Done! cd u2net-project && python mvtec_loco_fg_segmentation.py')
|
39 |
+
"
|
40 |
+
```
|
41 |
+
|
42 |
+
## What Gets Downloaded
|
43 |
+
|
44 |
+
✅ Complete source code
|
45 |
+
✅ Pre-trained model weights (u2net.pth - 169MB)
|
46 |
+
✅ Documentation (English + Chinese)
|
47 |
+
✅ Example scripts and utilities
|
48 |
+
✅ Ready to run immediately
|
49 |
+
|
50 |
+
## File Structure After Download
|
51 |
+
```
|
52 |
+
u2net-mvtec-loco/
|
53 |
+
├── mvtec_loco_fg_segmentation.py # Main script
|
54 |
+
├── saved_models/
|
55 |
+
│ └── u2net/
|
56 |
+
│ └── u2net.pth # Pre-trained model (169MB)
|
57 |
+
├── model/ # Model architecture
|
58 |
+
├── data_loader.py # Data utilities
|
59 |
+
├── README.md # English docs
|
60 |
+
├── README_CN.md # Chinese docs
|
61 |
+
└── ...
|
62 |
+
```
|
63 |
+
|
64 |
+
## Immediate Usage
|
65 |
+
```bash
|
66 |
+
# Process entire MVTec LOCO dataset
|
67 |
+
python mvtec_loco_fg_segmentation.py
|
68 |
+
|
69 |
+
# Process specific categories
|
70 |
+
python mvtec_loco_fg_segmentation.py --categories breakfast_box
|
71 |
+
|
72 |
+
# Custom threshold
|
73 |
+
python mvtec_loco_fg_segmentation.py --threshold 0.3
|
74 |
+
```
|
75 |
+
|
76 |
+
That's it! 🎉
|
README.md
CHANGED
@@ -1,3 +1,215 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MVTec LOCO Foreground Segmentation Tool
|
2 |
+
|
3 |
+
**[中文版 README](README_CN.md)** | **[English README](README.md)**
|
4 |
+
|
5 |
+
This tool uses U²-Net for generating binary foreground masks from the MVTec LOCO anomaly detection dataset.
|
6 |
+
|
7 |
+
## Overview
|
8 |
+
|
9 |
+
The `mvtec_loco_fg_segmentation.py` script processes the entire MVTec LOCO dataset and generates binary foreground masks for all images. It uses the U²-Net model to perform salient object detection and converts the probability maps to binary masks.
|
10 |
+
|
11 |
+
## Features
|
12 |
+
|
13 |
+
- **Complete Dataset Processing**: Processes all categories (breakfast_box, screw_bag, juice_bottle, splicing_connectors, pushpins)
|
14 |
+
- **Flexible Structure**: Handles both test and train splits with all subdirectories (good, logical_anomalies, structural_anomalies)
|
15 |
+
- **Binary Mask Output**: Generates clean binary masks (0/255) in L mode (grayscale)
|
16 |
+
- **Configurable Parameters**: Customizable threshold, categories, splits, and processing options
|
17 |
+
- **GPU/CPU Support**: Automatic detection and utilization of available hardware
|
18 |
+
|
19 |
+
## Requirements
|
20 |
+
|
21 |
+
### Environment Setup
|
22 |
+
```bash
|
23 |
+
# Create conda environment
|
24 |
+
conda create -n u2net python=3.8 -y
|
25 |
+
conda activate u2net
|
26 |
+
|
27 |
+
# Install dependencies
|
28 |
+
pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --index-url https://download.pytorch.org/whl/cu116
|
29 |
+
pip install opencv-python scikit-image matplotlib numpy pillow
|
30 |
+
```
|
31 |
+
|
32 |
+
### Model Weights
|
33 |
+
|
34 |
+
**Option 1: Automatic Download (Recommended)**
|
35 |
+
```bash
|
36 |
+
# Install HuggingFace Hub
|
37 |
+
pip install huggingface_hub
|
38 |
+
|
39 |
+
# The model will be automatically downloaded when you run the script
|
40 |
+
python mvtec_loco_fg_segmentation.py
|
41 |
+
```
|
42 |
+
|
43 |
+
**Option 2: Manual Download**
|
44 |
+
- Download `u2net.pth` (176.3 MB) from [GoogleDrive](https://drive.google.com/file/d/1ao1ovG1Qtx4b7EoskHXmi2E9rp5CHLcZ/view)
|
45 |
+
- Place it in: `./saved_models/u2net/u2net.pth`
|
46 |
+
|
47 |
+
**Option 3: Download from HuggingFace**
|
48 |
+
```bash
|
49 |
+
# Download only the model
|
50 |
+
python download_from_hf.py --model-only
|
51 |
+
|
52 |
+
# Or download the complete repository
|
53 |
+
python download_from_hf.py --complete-repo
|
54 |
+
```
|
55 |
+
|
56 |
+
### Dataset Structure
|
57 |
+
Ensure your MVTec LOCO dataset follows this structure:
|
58 |
+
```
|
59 |
+
mvtec_loco_anomaly_detection/
|
60 |
+
├── breakfast_box/
|
61 |
+
│ ├── test/
|
62 |
+
│ │ ├── good/
|
63 |
+
│ │ ├── logical_anomalies/
|
64 |
+
│ │ └── structural_anomalies/
|
65 |
+
│ └── train/
|
66 |
+
│ └── good/
|
67 |
+
├── screw_bag/
|
68 |
+
│ ├── test/
|
69 |
+
│ └── train/
|
70 |
+
└── ... (other categories)
|
71 |
+
```
|
72 |
+
|
73 |
+
## Usage
|
74 |
+
|
75 |
+
### Basic Usage
|
76 |
+
```bash
|
77 |
+
# Process entire dataset with default settings
|
78 |
+
python mvtec_loco_fg_segmentation.py
|
79 |
+
|
80 |
+
# Show help
|
81 |
+
python mvtec_loco_fg_segmentation.py -h
|
82 |
+
```
|
83 |
+
|
84 |
+
### Advanced Usage
|
85 |
+
|
86 |
+
```bash
|
87 |
+
# Specify custom dataset and model paths
|
88 |
+
python mvtec_loco_fg_segmentation.py \
|
89 |
+
--dataset_path /path/to/mvtec_loco \
|
90 |
+
--model_path /path/to/u2net.pth
|
91 |
+
|
92 |
+
# Process specific categories only
|
93 |
+
python mvtec_loco_fg_segmentation.py \
|
94 |
+
--categories breakfast_box juice_bottle
|
95 |
+
|
96 |
+
# Process only test split
|
97 |
+
python mvtec_loco_fg_segmentation.py \
|
98 |
+
--splits test
|
99 |
+
|
100 |
+
# Use different threshold for binary mask generation
|
101 |
+
python mvtec_loco_fg_segmentation.py \
|
102 |
+
--threshold 0.3
|
103 |
+
|
104 |
+
# Custom output directory name
|
105 |
+
python mvtec_loco_fg_segmentation.py \
|
106 |
+
--output_dir custom_masks
|
107 |
+
|
108 |
+
# Optimize processing with multiple workers
|
109 |
+
python mvtec_loco_fg_segmentation.py \
|
110 |
+
--num_workers 4 \
|
111 |
+
--batch_size 4
|
112 |
+
```
|
113 |
+
|
114 |
+
## Command Line Arguments
|
115 |
+
|
116 |
+
| Argument | Type | Default | Description |
|
117 |
+
|----------|------|---------|-------------|
|
118 |
+
| `--dataset_path` | str | `/root/hy-data/datasets/mvtec_loco_anomaly_detection` | Path to MVTec LOCO dataset root |
|
119 |
+
| `--model_path` | str | `./saved_models/u2net/u2net.pth` | Path to U2NET model weights |
|
120 |
+
| `--output_dir` | str | `fg_mask` | Output directory name for masks |
|
121 |
+
| `--threshold` | float | `0.5` | Threshold for binary mask generation |
|
122 |
+
| `--categories` | list | `all 5 categories` | Categories to process |
|
123 |
+
| `--splits` | list | `['test', 'train']` | Dataset splits to process |
|
124 |
+
| `--batch_size` | int | `1` | Batch size for processing |
|
125 |
+
| `--num_workers` | int | `1` | Number of data loading workers |
|
126 |
+
|
127 |
+
## Output Structure
|
128 |
+
|
129 |
+
The script generates masks in the following structure:
|
130 |
+
```
|
131 |
+
mvtec_loco_anomaly_detection/
|
132 |
+
├── fg_mask/ # Generated masks directory
|
133 |
+
│ ├── breakfast_box/
|
134 |
+
│ │ ├── test/
|
135 |
+
│ │ │ ├── good/
|
136 |
+
│ │ │ │ ├── 000.png # Binary mask (0/255 values)
|
137 |
+
│ │ │ │ ├── 001.png
|
138 |
+
│ │ │ │ └── ...
|
139 |
+
│ │ │ ├── logical_anomalies/
|
140 |
+
│ │ │ └── structural_anomalies/
|
141 |
+
│ │ └── train/
|
142 |
+
│ │ └── good/
|
143 |
+
│ └── ... (other categories)
|
144 |
+
└── ... (original dataset)
|
145 |
+
```
|
146 |
+
|
147 |
+
## Mask Properties
|
148 |
+
|
149 |
+
- **Format**: PNG images
|
150 |
+
- **Mode**: L (grayscale, single channel)
|
151 |
+
- **Values**: Binary (0 for background, 255 for foreground)
|
152 |
+
- **Size**: Same as original images
|
153 |
+
- **Threshold**: Configurable (default 0.5)
|
154 |
+
|
155 |
+
## Performance Notes
|
156 |
+
|
157 |
+
- **GPU Recommended**: Processing is significantly faster with CUDA-enabled GPU
|
158 |
+
- **Memory Usage**: Each image requires ~200MB GPU memory during processing
|
159 |
+
- **Processing Time**: ~2-3 seconds per image on modern GPU
|
160 |
+
- **Total Images**: ~5000+ images in complete dataset
|
161 |
+
|
162 |
+
## Troubleshooting
|
163 |
+
|
164 |
+
### Common Issues
|
165 |
+
|
166 |
+
1. **CUDA Out of Memory**: Reduce batch size or use CPU processing
|
167 |
+
2. **Model Not Found**: Ensure u2net.pth is in correct directory
|
168 |
+
3. **Dataset Path Error**: Verify MVTec LOCO dataset structure
|
169 |
+
4. **Permission Errors**: Check write permissions for output directory
|
170 |
+
|
171 |
+
### Error Messages
|
172 |
+
|
173 |
+
- `ERROR: Dataset path not found`: Check dataset path and extraction
|
174 |
+
- `ERROR: Model path not found`: Download and place u2net.pth correctly
|
175 |
+
- `ERROR: Invalid categories`: Use valid category names
|
176 |
+
|
177 |
+
## Examples Output
|
178 |
+
|
179 |
+
The script provides detailed progress information:
|
180 |
+
```
|
181 |
+
Configuration:
|
182 |
+
Dataset path: /root/hy-data/datasets/mvtec_loco_anomaly_detection
|
183 |
+
Model path: ./saved_models/u2net/u2net.pth
|
184 |
+
Output directory: fg_mask
|
185 |
+
Binary threshold: 0.5
|
186 |
+
Categories: ['breakfast_box', 'screw_bag', 'juice_bottle', 'splicing_connectors', 'pushpins']
|
187 |
+
Splits: ['test', 'train']
|
188 |
+
|
189 |
+
...load U2NET---
|
190 |
+
Processing category: breakfast_box
|
191 |
+
Processing breakfast_box/test/good
|
192 |
+
Found 102 images
|
193 |
+
Processing 1/102: 000.png
|
194 |
+
Processing 20/102: 019.png
|
195 |
+
...
|
196 |
+
```
|
197 |
+
|
198 |
+
## Citation
|
199 |
+
|
200 |
+
If you use this tool in your research, please cite the original U²-Net paper:
|
201 |
+
|
202 |
+
```bibtex
|
203 |
+
@InProceedings{Qin_2020_PR,
|
204 |
+
title = {U2-Net: Going Deeper with Nested U-Structure for Salient Object Detection},
|
205 |
+
author = {Qin, Xuebin and Zhang, Zichen and Huang, Chenyang and Dehghan, Masood and Zaiane, Osmar and Jagersand, Martin},
|
206 |
+
journal = {Pattern Recognition},
|
207 |
+
volume = {106},
|
208 |
+
pages = {107404},
|
209 |
+
year = {2020}
|
210 |
+
}
|
211 |
+
```
|
212 |
+
|
213 |
+
## License
|
214 |
+
|
215 |
+
This tool extends the original U²-Net implementation. Please refer to the original repository for license information.
|
README_CN.md
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MVTec LOCO 前景分割工具
|
2 |
+
|
3 |
+
**[中文版 README](README_CN.md)** | **[English README](README.md)**
|
4 |
+
|
5 |
+
本工具使用 U²-Net 为 MVTec LOCO 异常检测数据集生成二值前景掩码。
|
6 |
+
|
7 |
+
## 概述
|
8 |
+
|
9 |
+
`mvtec_loco_fg_segmentation.py` 脚本处理整个 MVTec LOCO 数据集,为所有图像生成二值前景掩码。它使用 U²-Net 模型执行显著性目标检测,并将概率图转换为二值掩码。
|
10 |
+
|
11 |
+
## 功能特点
|
12 |
+
|
13 |
+
- **完整数据集处理**:处理所有类别(breakfast_box, screw_bag, juice_bottle, splicing_connectors, pushpins)
|
14 |
+
- **灵活的结构支持**:处理 test 和 train 分割中的所有子目录(good, logical_anomalies, structural_anomalies)
|
15 |
+
- **二值掩码输出**:生成标准的二值掩码(0/255),使用 L 模式(灰度)
|
16 |
+
- **参数可配置**:可自定义阈值、类别、分割和处理选项
|
17 |
+
- **GPU/CPU 支持**:自动检测并利用可用硬件
|
18 |
+
|
19 |
+
## 环境要求
|
20 |
+
|
21 |
+
### 环境设置
|
22 |
+
```bash
|
23 |
+
# 创建 conda 环境
|
24 |
+
conda create -n u2net python=3.8 -y
|
25 |
+
conda activate u2net
|
26 |
+
|
27 |
+
# 安装依赖
|
28 |
+
pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --index-url https://download.pytorch.org/whl/cu116
|
29 |
+
pip install opencv-python scikit-image matplotlib numpy pillow
|
30 |
+
```
|
31 |
+
|
32 |
+
### 模型权重
|
33 |
+
下载 U²-Net 模型权重并放置到正确目录:
|
34 |
+
- 从 [GoogleDrive](https://drive.google.com/file/d/1ao1ovG1Qtx4b7EoskHXmi2E9rp5CHLcZ/view) 下载 `u2net.pth`(176.3 MB)
|
35 |
+
- 放置到:`./saved_models/u2net/u2net.pth`
|
36 |
+
|
37 |
+
### 数据集结构
|
38 |
+
确保您的 MVTec LOCO 数据集遵循以下结构:
|
39 |
+
```
|
40 |
+
mvtec_loco_anomaly_detection/
|
41 |
+
├── breakfast_box/
|
42 |
+
│ ├── test/
|
43 |
+
│ │ ├── good/
|
44 |
+
│ │ ├── logical_anomalies/
|
45 |
+
│ │ └── structural_anomalies/
|
46 |
+
│ └── train/
|
47 |
+
│ └── good/
|
48 |
+
├── screw_bag/
|
49 |
+
│ ├── test/
|
50 |
+
│ └── train/
|
51 |
+
└── ... (其他类别)
|
52 |
+
```
|
53 |
+
|
54 |
+
## 使用方法
|
55 |
+
|
56 |
+
### 基本用法
|
57 |
+
```bash
|
58 |
+
# 使用默认设置处理整个数据集
|
59 |
+
python mvtec_loco_fg_segmentation.py
|
60 |
+
|
61 |
+
# 显示帮助信息
|
62 |
+
python mvtec_loco_fg_segmentation.py -h
|
63 |
+
```
|
64 |
+
|
65 |
+
### 高级用法
|
66 |
+
|
67 |
+
```bash
|
68 |
+
# 指定自定义数据集和模型路径
|
69 |
+
python mvtec_loco_fg_segmentation.py \
|
70 |
+
--dataset_path /path/to/mvtec_loco \
|
71 |
+
--model_path /path/to/u2net.pth
|
72 |
+
|
73 |
+
# 仅处理特定类别
|
74 |
+
python mvtec_loco_fg_segmentation.py \
|
75 |
+
--categories breakfast_box juice_bottle
|
76 |
+
|
77 |
+
# 仅处理测试集
|
78 |
+
python mvtec_loco_fg_segmentation.py \
|
79 |
+
--splits test
|
80 |
+
|
81 |
+
# 使用不同的二值化阈值
|
82 |
+
python mvtec_loco_fg_segmentation.py \
|
83 |
+
--threshold 0.3
|
84 |
+
|
85 |
+
# 自定义输出目录名称
|
86 |
+
python mvtec_loco_fg_segmentation.py \
|
87 |
+
--output_dir custom_masks
|
88 |
+
|
89 |
+
# 使用多进程优化处理速度
|
90 |
+
python mvtec_loco_fg_segmentation.py \
|
91 |
+
--num_workers 4 \
|
92 |
+
--batch_size 4
|
93 |
+
```
|
94 |
+
|
95 |
+
## 命令行参数
|
96 |
+
|
97 |
+
| 参数 | 类型 | 默认值 | 说明 |
|
98 |
+
|------|------|--------|------|
|
99 |
+
| `--dataset_path` | str | `/root/hy-data/datasets/mvtec_loco_anomaly_detection` | MVTec LOCO 数据集根目录路径 |
|
100 |
+
| `--model_path` | str | `./saved_models/u2net/u2net.pth` | U2NET 模型权重文件路径 |
|
101 |
+
| `--output_dir` | str | `fg_mask` | 生成掩码的输出目录名称 |
|
102 |
+
| `--threshold` | float | `0.5` | 二值掩码生成阈值 |
|
103 |
+
| `--categories` | list | `所有5个类别` | 要处理的类别 |
|
104 |
+
| `--splits` | list | `['test', 'train']` | 要处理的数据集分割 |
|
105 |
+
| `--batch_size` | int | `1` | 处理批次大小 |
|
106 |
+
| `--num_workers` | int | `1` | 数据加载工作进程数 |
|
107 |
+
|
108 |
+
## 输出结构
|
109 |
+
|
110 |
+
脚本生成以下结构的掩码:
|
111 |
+
```
|
112 |
+
mvtec_loco_anomaly_detection/
|
113 |
+
├── fg_mask/ # 生成的掩码目录
|
114 |
+
│ ├── breakfast_box/
|
115 |
+
│ │ ├── test/
|
116 |
+
│ │ │ ├── good/
|
117 |
+
│ │ │ │ ├── 000.png # 二值掩码(0/255 值)
|
118 |
+
│ │ │ │ ├── 001.png
|
119 |
+
│ │ │ │ └── ...
|
120 |
+
│ │ │ ├── logical_anomalies/
|
121 |
+
│ │ │ └── structural_anomalies/
|
122 |
+
│ │ └── train/
|
123 |
+
│ │ └── good/
|
124 |
+
│ └── ... (其他类别)
|
125 |
+
└── ... (原始数据集)
|
126 |
+
```
|
127 |
+
|
128 |
+
## 掩码属性
|
129 |
+
|
130 |
+
- **格式**:PNG 图像
|
131 |
+
- **模式**:L(灰度,单通道)
|
132 |
+
- **值**:二值(背景为0,前景为255)
|
133 |
+
- **尺寸**:与原始图像相同
|
134 |
+
- **阈值**:可配置(默认0.5)
|
135 |
+
|
136 |
+
## 性能说明
|
137 |
+
|
138 |
+
- **推荐GPU**:使用支持CUDA的GPU处理速度显著更快
|
139 |
+
- **内存使用**:每张图像在处理期间需要约200MB GPU内存
|
140 |
+
- **处理时间**:现代GPU上每张图像约2-3秒
|
141 |
+
- **总图像数**:完整数据集中约5000+张图像
|
142 |
+
|
143 |
+
## 故障排除
|
144 |
+
|
145 |
+
### 常见问题
|
146 |
+
|
147 |
+
1. **CUDA内存不足**:减少批次大小或使用CPU处理
|
148 |
+
2. **找不到模型**:确保u2net.pth在正确目录中
|
149 |
+
3. **数据集路径错误**:验证MVTec LOCO数据集结构
|
150 |
+
4. **权限错误**:检查输出目录的写入权限
|
151 |
+
|
152 |
+
### 错误信息
|
153 |
+
|
154 |
+
- `ERROR: Dataset path not found`:检查数据集路径和提取情况
|
155 |
+
- `ERROR: Model path not found`:正确下载并放置u2net.pth
|
156 |
+
- `ERROR: Invalid categories`:使用有效的类别名称
|
157 |
+
|
158 |
+
## 示例输出
|
159 |
+
|
160 |
+
脚本提供详细的进度信息:
|
161 |
+
```
|
162 |
+
配置信息:
|
163 |
+
数据集路径: /root/hy-data/datasets/mvtec_loco_anomaly_detection
|
164 |
+
模型路径: ./saved_models/u2net/u2net.pth
|
165 |
+
输出目录: fg_mask
|
166 |
+
二值化阈值: 0.5
|
167 |
+
类别: ['breakfast_box', 'screw_bag', 'juice_bottle', 'splicing_connectors', 'pushpins']
|
168 |
+
分割: ['test', 'train']
|
169 |
+
|
170 |
+
...加载 U2NET---
|
171 |
+
处理类别: breakfast_box
|
172 |
+
处理 breakfast_box/test/good
|
173 |
+
找到 102 张图像
|
174 |
+
处理 1/102: 000.png
|
175 |
+
处理 20/102: 019.png
|
176 |
+
...
|
177 |
+
```
|
178 |
+
|
179 |
+
## 引用
|
180 |
+
|
181 |
+
如果您在研究中使用此工具,请引用原始的 U²-Net 论文:
|
182 |
+
|
183 |
+
```bibtex
|
184 |
+
@InProceedings{Qin_2020_PR,
|
185 |
+
title = {U2-Net: Going Deeper with Nested U-Structure for Salient Object Detection},
|
186 |
+
author = {Qin, Xuebin and Zhang, Zichen and Huang, Chenyang and Dehghan, Masood and Zaiane, Osmar and Jagersand, Martin},
|
187 |
+
journal = {Pattern Recognition},
|
188 |
+
volume = {106},
|
189 |
+
pages = {107404},
|
190 |
+
year = {2020}
|
191 |
+
}
|
192 |
+
```
|
193 |
+
|
194 |
+
## 许可证
|
195 |
+
|
196 |
+
本工具扩展了原始 U²-Net 实现。请参考原始仓库的许可证信息。
|
README_HF.md
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: U²-Net MVTec LOCO Foreground Segmentation
|
3 |
+
tags:
|
4 |
+
- computer-vision
|
5 |
+
- image-segmentation
|
6 |
+
- anomaly-detection
|
7 |
+
- u2net
|
8 |
+
- mvtec-loco
|
9 |
+
- pytorch
|
10 |
+
license: apache-2.0
|
11 |
+
language: en
|
12 |
+
library_name: pytorch
|
13 |
+
---
|
14 |
+
|
15 |
+
# U²-Net MVTec LOCO Foreground Segmentation
|
16 |
+
|
17 |
+
This repository contains a complete tool for generating binary foreground masks from the MVTec LOCO anomaly detection dataset using U²-Net.
|
18 |
+
|
19 |
+
## 🚀 Quick Start
|
20 |
+
|
21 |
+
### Installation
|
22 |
+
```bash
|
23 |
+
# Clone from HuggingFace
|
24 |
+
git clone https://huggingface.co/zhiqing0205/u2net-mvtec-loco-segmentation
|
25 |
+
cd u2net-mvtec-loco-segmentation
|
26 |
+
|
27 |
+
# Install dependencies
|
28 |
+
pip install torch torchvision opencv-python scikit-image matplotlib numpy pillow huggingface_hub
|
29 |
+
|
30 |
+
# Run segmentation (model auto-downloads)
|
31 |
+
python mvtec_loco_fg_segmentation.py
|
32 |
+
```
|
33 |
+
|
34 |
+
### Download Options
|
35 |
+
|
36 |
+
**Option 1: Auto-download (Recommended)**
|
37 |
+
```python
|
38 |
+
from download_from_hf import download_u2net_model
|
39 |
+
|
40 |
+
# Download model only
|
41 |
+
download_u2net_model()
|
42 |
+
|
43 |
+
# Download complete repo
|
44 |
+
from download_from_hf import download_complete_repo
|
45 |
+
download_complete_repo()
|
46 |
+
```
|
47 |
+
|
48 |
+
**Option 2: Manual download**
|
49 |
+
```bash
|
50 |
+
python download_from_hf.py --model-only
|
51 |
+
# or
|
52 |
+
python download_from_hf.py --complete-repo
|
53 |
+
```
|
54 |
+
|
55 |
+
## 📁 Repository Contents
|
56 |
+
|
57 |
+
```
|
58 |
+
├── mvtec_loco_fg_segmentation.py # Main segmentation script
|
59 |
+
├── download_from_hf.py # HuggingFace download utility
|
60 |
+
├── model/ # U2NET model architecture
|
61 |
+
├── data_loader.py # Data loading utilities
|
62 |
+
├── saved_models/
|
63 |
+
│ └── u2net/
|
64 |
+
│ └── u2net.pth # Pre-trained U2NET weights (169MB)
|
65 |
+
├── README.md # English documentation
|
66 |
+
├── README_CN.md # Chinese documentation
|
67 |
+
└── ...
|
68 |
+
```
|
69 |
+
|
70 |
+
## 🎯 Features
|
71 |
+
|
72 |
+
- **Complete Dataset Processing**: All MVTec LOCO categories
|
73 |
+
- **Binary Mask Output**: Standard 0/255 masks in grayscale
|
74 |
+
- **GPU/CPU Support**: Automatic hardware detection
|
75 |
+
- **Configurable Parameters**: Threshold, categories, splits
|
76 |
+
- **Auto-download**: No manual model download needed
|
77 |
+
|
78 |
+
## 💻 Usage
|
79 |
+
|
80 |
+
### Basic Usage
|
81 |
+
```bash
|
82 |
+
python mvtec_loco_fg_segmentation.py
|
83 |
+
```
|
84 |
+
|
85 |
+
### Advanced Usage
|
86 |
+
```bash
|
87 |
+
# Custom parameters
|
88 |
+
python mvtec_loco_fg_segmentation.py \
|
89 |
+
--threshold 0.3 \
|
90 |
+
--categories breakfast_box juice_bottle \
|
91 |
+
--splits test
|
92 |
+
|
93 |
+
# Show all options
|
94 |
+
python mvtec_loco_fg_segmentation.py -h
|
95 |
+
```
|
96 |
+
|
97 |
+
## 📊 Model Information
|
98 |
+
|
99 |
+
- **Architecture**: U²-Net (U Square Net)
|
100 |
+
- **Model Size**: 169MB
|
101 |
+
- **Input Size**: 320×320 (auto-resized)
|
102 |
+
- **Output**: Binary masks (0/255)
|
103 |
+
- **Task**: Salient object detection → Foreground segmentation
|
104 |
+
|
105 |
+
## 🏷️ Supported Categories
|
106 |
+
|
107 |
+
- `breakfast_box`
|
108 |
+
- `screw_bag`
|
109 |
+
- `juice_bottle`
|
110 |
+
- `splicing_connectors`
|
111 |
+
- `pushpins`
|
112 |
+
|
113 |
+
## 📈 Performance
|
114 |
+
|
115 |
+
- **GPU Processing**: ~2-3 seconds per image
|
116 |
+
- **CPU Processing**: ~10-15 seconds per image
|
117 |
+
- **Memory Usage**: ~200MB GPU memory per image
|
118 |
+
- **Total Dataset**: ~5000+ images
|
119 |
+
|
120 |
+
## 📖 Citation
|
121 |
+
|
122 |
+
```bibtex
|
123 |
+
@InProceedings{Qin_2020_PR,
|
124 |
+
title = {U2-Net: Going Deeper with Nested U-Structure for Salient Object Detection},
|
125 |
+
author = {Qin, Xuebin and Zhang, Zichen and Huang, Chenyang and Dehghan, Masood and Zaiane, Osmar and Jagersand, Martin},
|
126 |
+
journal = {Pattern Recognition},
|
127 |
+
volume = {106},
|
128 |
+
pages = {107404},
|
129 |
+
year = {2020}
|
130 |
+
}
|
131 |
+
```
|
132 |
+
|
133 |
+
## 📜 License
|
134 |
+
|
135 |
+
Apache-2.0 License
|
136 |
+
|
137 |
+
## 🔗 Links
|
138 |
+
|
139 |
+
- [Original U²-Net Paper](https://arxiv.org/pdf/2005.09007.pdf)
|
140 |
+
- [MVTec LOCO Dataset](https://www.mvtec.com/company/research/datasets/mvtec-loco)
|
141 |
+
- [GitHub Repository](https://github.com/NathanUA/U-2-Net)
|
README_original.md
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<p align="center">
|
2 |
+
<img width="320" height="320" src="figures/U2Net_Logo.png">
|
3 |
+
|
4 |
+
<h1 align="center">U<sup>2</sup>-Net: U Square Net</h1>
|
5 |
+
|
6 |
+
</p>
|
7 |
+
|
8 |
+
This is the official repo for our paper **U<sup>2</sup>-Net(U square net)** published in Pattern Recognition 2020:
|
9 |
+
|
10 |
+
## [U<sup>2</sup>-Net: Going Deeper with Nested U-Structure for Salient Object Detection](https://arxiv.org/pdf/2005.09007.pdf)
|
11 |
+
[Xuebin Qin](https://xuebinqin.github.io/), [Zichen Zhang](https://webdocs.cs.ualberta.ca/~zichen2/), [Chenyang Huang](https://chenyangh.com/), [Masood Dehghan](https://sites.google.com/view/masooddehghan), [Osmar R. Zaiane](http://webdocs.cs.ualberta.ca/~zaiane/) and [Martin Jagersand](https://webdocs.cs.ualberta.ca/~jag/)
|
12 |
+
|
13 |
+
|
14 |
+
__Contact__: xuebin[at]ualberta[dot]ca
|
15 |
+
|
16 |
+
## Updates !!!
|
17 |
+
|
18 |
+
** (2022-Aug.-24) ** We are glad to announce that our U<sup>2</sup>-Net published in Pattern Recognition has been awarded the 2020 Pattern Recognition BEST PAPER AWARD !!!
|
19 |
+

|
20 |
+
|
21 |
+
** (2022-Aug.-17) **
|
22 |
+
Our U<sup>2</sup>-Net models are now available on [PlayTorch](https://playtorch.dev/), where you can build your own demo and run it on your Android/iOS phone. Try out this demo on [](https://playtorch.dev/snack/@playtorch/u2net/) and bring your ideas about U<sup>2</sup>-Net to truth in minutes!
|
23 |
+
|
24 |
+
** (2022-Jul.-5)** Our new work **Highly Accurate Dichotomous Image Segmentation (DIS) [**Project Page**](https://xuebinqin.github.io/dis/index.html), [**Github**](https://github.com/xuebinqin/DIS) is accepted by ECCV 2022. Our code and dataset will be released before July 17th, 2022. Please be aware of our updates.
|
25 |
+

|
26 |
+

|
27 |
+

|
28 |
+

|
29 |
+
|
30 |
+
** (2022-Jun.-3)** Thank [**Adir Kol**](https://github.com/adirkol) for sharing the iOS App [**3D Photo Creator**](https://apps.apple.com/us/app/3d-photo-creator/id1619676262) based on our U<sup>2</sup>-Net.
|
31 |
+

|
32 |
+
|
33 |
+
** (2022-Mar.-31)** Thank [**Hikaru Tsuyumine**] for implementing the iOS App [**Portrait Drawing**](https://apps.apple.com/us/app/portrait-drawing/id1623269600) based on our U<sup>2</sup>-Net portrait generation model.
|
34 |
+

|
35 |
+
|
36 |
+
** (2022-Apr.-12)** Thank [**Kevin Shah**](https://github.com/ioskevinshah) for providing us a great iOS App [**Lensto**](https://apps.apple.com/in/app/lensto-background-changer/id1574844033), ([**Demo Video**](https://www.youtube.com/shorts/jWwUiKZjfok)), based on U<sup>2</sup>-Net.
|
37 |
+

|
38 |
+
|
39 |
+
** (2022-Mar.-31)** Our U<sup>2</sup>-Net model is also integrated by [**Hotpot.ai**](https://hotpot.ai/) for art design.
|
40 |
+

|
41 |
+
|
42 |
+
** (2022-Mar-19)** Thank [**Kikedao**](https://github.com/Kikedao) for providing a fantastic webapp [**Silueta**](https://silueta.me/) based on U<sup>2</sup>-Net. More details can be found at [**https://github.com/xuebinqin/U-2-Net/issues/295**](https://github.com/xuebinqin/U-2-Net/issues/295).
|
43 |
+

|
44 |
+
|
45 |
+
** (2022-Mar-17)** Thank [**Ezaldeen Sahb**](https://github.com/Ezaldeen99/BackgroundRemoval) for implementing the iOS library for image background removal based on U<sup>2</sup>-Net, which will greatly facilitate the developing of mobile apps.
|
46 |
+

|
47 |
+
|
48 |
+
<!-- ** (2022-Mar-10)** Thank [**Doron Adler**](https://github.com/Norod/U-2-Net-StyleTransfer) for training the awesome style transfer U<sup>2</sup>-Net.
|
49 |
+
 -->
|
50 |
+
|
51 |
+
** (2022-Mar-8)** Thank [**Levin Dabhi**](https://github.com/levindabhi/cloth-segmentation) for training the amazing clothes segmentation U<sup>2</sup>-Net.
|
52 |
+

|
53 |
+
|
54 |
+
** (2022-Mar-3)** Thank [**Renato Violin**](https://github.com/renatoviolin/bg-remove-augment) for providing an awesome webapp for image background removal and replacement based on our U<sup>2</sup>-Net.
|
55 |
+

|
56 |
+
|
57 |
+
**(2021-Dec-21)** This [**blog**](https://rockyshikoku.medium.com/u2net-to-coreml-machine-learning-segmentation-on-iphone-eac0c721d67b) clearly describes the way of converting U<sup>2</sup>-Net to [**CoreML**](https://github.com/john-rocky/CoreML-Models) and running it on iphone.
|
58 |
+
|
59 |
+
**(2021-Nov-28)** Interesting Sky Segmentation models developed by [**xiongzhu**](https://github.com/xiongzhu666/Sky-Segmentation-and-Post-processing) using U<sup>2</sup>-Net.
|
60 |
+
|
61 |
+

|
62 |
+
|
63 |
+
**(2021-Nov-28)** Awesome image editing app [**Pixelmator pro**](https://www.pixelmator.com/pro/) uses U<sup>2</sup>-Net as one of its background removal models.
|
64 |
+
|
65 |
+

|
66 |
+
|
67 |
+
**(2021-Aug-24)** We played a bit more about fusing the orignal image and the generated portraits to composite different styles. You can <br/>
|
68 |
+
(1) Download this repo by
|
69 |
+
```
|
70 |
+
git clone https://github.com/NathanUA/U-2-Net.git
|
71 |
+
```
|
72 |
+
(2) Download the u2net_portrait.pth from [**GoogleDrive**](https://drive.google.com/file/d/1IG3HdpcRiDoWNookbncQjeaPN28t90yW/view?usp=sharing) or [**Baidu Pan(提取码:chgd)**](https://pan.baidu.com/s/1BYT5Ts6BxwpB8_l2sAyCkw)model and put it into the directory: ```./saved_models/u2net_portrait/```, <br/>
|
73 |
+
(3) run the code by command
|
74 |
+
```
|
75 |
+
python u2net_portrait_composite.py -s 20 -a 0.5
|
76 |
+
```
|
77 |
+
,where ``-s`` indicates the sigma of gaussian function for blurring the orignal image and ``-a`` denotes the alpha weights of the orignal image when fusing them. <br/>
|
78 |
+
|
79 |
+

|
80 |
+
|
81 |
+
**(2021-July-16)** A new [background removal webapp](https://remove-background.net/) developed by Изатоп Василий.
|
82 |
+
|
83 |
+

|
84 |
+
|
85 |
+
**(2021-May-26)** Thank [**Dang Quoc Quy**](https://github.com/quyvsquy) for his [**Art Transfer APP**](https://play.google.com/store/apps/details?id=com.quyvsquy.arttransfer) built upon U<sup>2</sup>-Net.
|
86 |
+
|
87 |
+
<!------>
|
88 |
+
|
89 |
+
**(2021-May-5)** Thank [**AK391**](https://github.com/AK391) for sharing his [**Gradio Web Demo of U<sup>2</sup>-Net**](https://gradio.app/hub/AK391/U-2-Net).
|
90 |
+
|
91 |
+

|
92 |
+
|
93 |
+
|
94 |
+
**(2021-Apr-29)** Thanks [**Jonathan Benavides Vallejo**](https://www.linkedin.com/in/jonathanbv/) for releasing his App [**LensOCR: Extract Text & Image**](https://apps.apple.com/ch/app/lensocr-extract-text-image/id1549961729?l=en&mt=12), which uses U<sup>2</sup>-Net for extracting the image foreground.
|
95 |
+
|
96 |
+

|
97 |
+
|
98 |
+
**(2021-Apr-18)** Thanks [**Andrea Scuderi**](https://www.linkedin.com/in/andreascuderi/) for releasing his App [**Clipping Camera**](https://apps.apple.com/us/app/clipping-camera/id1548192169?ign-mpt=uo%3D2), which is an U<sup>2</sup>-Net driven realtime camera app and "is able to detect relevant object from the scene and clip them to apply fancy filters".
|
99 |
+
|
100 |
+

|
101 |
+
|
102 |
+
**(2021-Mar-17)** [**Dennis Bappert**](https://github.com/dennisbappert) re-trained the U<sup>2</sup>-Net model for [**human portrait matting**](https://github.com/dennisbappert/u-2-net-portrait). The results look very promising and he also provided the details of the training process and data generation(and augmentation) strategy, which are inspiring.
|
103 |
+
|
104 |
+
**(2021-Mar-11)** Dr. Tim developed a [**video version rembg**](https://github.com/ecsplendid/rembg-greenscreen) for removing video backgrounds using U<sup>2</sup>-Net. The awesome demo results can be found on [**YouTube**](https://www.youtube.com/watch?v=4NjqR2vCV_k).
|
105 |
+
|
106 |
+
**(2021-Mar-02)** We found some other interesting applications of our U<sup>2</sup>-Net including [**MOJO CUT**](https://play.google.com/store/apps/details?id=com.innoria.magicut&hl=en_CA&gl=US), [**Real-Time Background Removal on Iphone**](https://www.linkedin.com/feed/update/urn:li:activity:6752303661705170944/?updateEntityUrn=urn%3Ali%3Afs_feedUpdate%3A%28V2%2Curn%3Ali%3Aactivity%3A6752303661705170944%29), [**Video Background Removal**](https://nisargkapkar.hashnode.dev/image-and-video-background-removal-using-deep-learning), [**Another Online Portrait Generation Demo on AWS**](http://s3-website-hosting-u2net.s3-website-eu-west-1.amazonaws.com/), [**AI Scissor**](https://qooba.net/2020/09/11/ai-scissors-sharp-cut-with-neural-networks/).
|
107 |
+
|
108 |
+
**(2021-Feb-15)** We just released an online demo [**http://profu.ai**](http://profu.ai) for the portrait generation. Please feel free to give it a try and provide any suggestions or comments. <br/>
|
109 |
+
 <br/>
|
110 |
+
|
111 |
+
**(2021-Feb-06)** Recently, some people asked the problem of using U<sup>2</sup>-Net for human segmentation, so we trained another example model for human segemntation based on [**Supervisely Person Dataset**](https://supervise.ly/explore/projects/supervisely-person-dataset-23304/datasets). <br/>
|
112 |
+
|
113 |
+
(1) To run the human segmentation model, please first downlowd the [**u2net_human_seg.pth**](https://drive.google.com/file/d/1m_Kgs91b21gayc2XLW0ou8yugAIadWVP/view?usp=sharing) model weights into ``` ./saved_models/u2net_human_seg/```. <br/>
|
114 |
+
(2) Prepare the to-be-segmented images into the corresponding directory, e.g. ```./test_data/test_human_images/```. <br/>
|
115 |
+
(3) Run the inference by command: ```python u2net_human_seg_test.py``` and the results will be output into the corresponding dirctory, e.g. ```./test_data/u2net_test_human_images_results/```<br/>
|
116 |
+
[**Notes: Due to the labeling accuracy of the Supervisely Person Dataset, the human segmentation model (u2net_human_seg.pth) here won't give you hair-level accuracy. But it should be more robust than u2net trained with DUTS-TR dataset on general human segmentation task. It can be used for human portrait segmentation, human body segmentation, etc.**](https://github.com/NathanUA/U-2-Net)<br/>
|
117 |
+
|
118 |
+
 <br/>
|
119 |
+

|
120 |
+

|
121 |
+
|
122 |
+
**(2020-Dec-28)** Some interesting applications and useful tools based on U<sup>2</sup>-Net: <br/>
|
123 |
+
(1) [**Xiaolong Liu**](https://github.com/LiuXiaolong19920720) developed several very interesting applications based on U<sup>2</sup>-Net including [**Human Portrait Drawing**](https://www.cvpy.net/studio/cv/func/DeepLearning/sketch/sketch/page/)(As far as I know, Xiaolong is the first one who uses U<sup>2</sup>-Net for portrait generation), [**image matting**](https://www.cvpy.net/studio/cv/func/DeepLearning/matting/matting/page/) and [**so on**](https://www.cvpy.net/). <br/>
|
124 |
+
(2) [**Vladimir Seregin**](https://github.com/peko/nn-lineart) developed an interesting tool, [**NN based lineart**](https://peko.github.io/nn-lineart/), for comparing the portrait results of U<sup>2</sup>-Net and that of another popular model, [**ArtLine**](https://github.com/vijishmadhavan/ArtLine), developed by [**Vijish Madhavan**](https://github.com/vijishmadhavan). <br/>
|
125 |
+
(3) [**Daniel Gatis**](https://github.com/danielgatis/rembg) built a python tool, [**Rembg**](https://pypi.org/project/rembg/), for image backgrounds removal based on U<sup>2</sup>-Net. I think this tool will greatly facilitate the application of U<sup>2</sup>-Net in different fields. <br/>
|
126 |
+

|
127 |
+
|
128 |
+
**(2020-Nov-21)** Recently, we found an interesting application of U<sup>2</sup>-Net for [**human portrait drawing**](https://www.pythonf.cn/read/141098). Therefore, we trained another model for this task based on the [**APDrawingGAN dataset**](https://github.com/yiranran/APDrawingGAN).
|
129 |
+
|
130 |
+

|
131 |
+
|
132 |
+

|
133 |
+
|
134 |
+

|
135 |
+
|
136 |
+
### Usage for portrait generation
|
137 |
+
1. Clone this repo to local
|
138 |
+
```
|
139 |
+
git clone https://github.com/NathanUA/U-2-Net.git
|
140 |
+
```
|
141 |
+
|
142 |
+
2. Download the u2net_portrait.pth from [**GoogleDrive**](https://drive.google.com/file/d/1IG3HdpcRiDoWNookbncQjeaPN28t90yW/view?usp=sharing) or [**Baidu Pan(提取码:chgd)**](https://pan.baidu.com/s/1BYT5Ts6BxwpB8_l2sAyCkw)model and put it into the directory: ```./saved_models/u2net_portrait/```.
|
143 |
+
|
144 |
+
3. Run on the testing set. <br/>
|
145 |
+
(1) Download the train and test set from [**APDrawingGAN**](https://github.com/yiranran/APDrawingGAN). These images and their ground truth are stitched side-by-side (512x1024). You need to split each of these images into two 512x512 images and put them into ```./test_data/test_portrait_images/portrait_im/```. You can also download the split testing set on [GoogleDrive](https://drive.google.com/file/d/1NkTsDDN8VO-JVik6VxXyV-3l2eo29KCk/view?usp=sharing). <br/>
|
146 |
+
(2) Running the inference with command ```python u2net_portrait_test.py``` will ouptut the results into ```./test_data/test_portrait_images/portrait_results```. <br/>
|
147 |
+
|
148 |
+
4. Run on your own dataset. <br/>
|
149 |
+
(1) Prepare your images and put them into ```./test_data/test_portrait_images/your_portrait_im/```. [**To obtain enough details of the protrait, human head region in the input image should be close to or larger than 512x512. The head background should be relatively clear.**](https://github.com/NathanUA/U-2-Net) <br/>
|
150 |
+
(2) Run the prediction by command ```python u2net_portrait_demo.py``` will outputs the results to ```./test_data/test_portrait_images/your_portrait_results/```. <br/>
|
151 |
+
(3) The difference between ```python u2net_portrait_demo.py``` and ```python u2net_portrait_test.py``` is that we added a simple [**face detection**](https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_objdetect/py_face_detection/py_face_detection.html) step before the portrait generation in ```u2net_portrait_demo.py```. Because the testing set of APDrawingGAN are normalized and cropped to 512x512 for including only heads of humans, while our own dataset may varies with different resolutions and contents. Therefore, the code ```python u2net_portrait_demo.py``` will detect the biggest face from the given image and then crop, pad and resize the ROI to 512x512 for feeding to the network. The following figure shows how to take your own photos for generating high quality portraits.
|
152 |
+
|
153 |
+
**(2020-Sep-13)** Our U<sup>2</sup>-Net based model is the **6th** in [**MICCAI 2020 Thyroid Nodule Segmentation Challenge**](https://tn-scui2020.grand-challenge.org/Resultannouncement/).
|
154 |
+
|
155 |
+
**(2020-May-18)** The official paper of our **U<sup>2</sup>-Net (U square net)** ([**PDF in elsevier**(free until July 5 2020)](https://www.sciencedirect.com/science/article/pii/S0031320320302077?dgcid=author), [**PDF in arxiv**](http://arxiv.org/abs/2005.09007)) is now available. If you are not able to access that, please feel free to drop me an email.
|
156 |
+
|
157 |
+
**(2020-May-16)** We fixed the upsampling issue of the network. Now, the model should be able to handle **arbitrary input size**. (Tips: This modification is to facilitate the retraining of U<sup>2</sup>-Net on your own datasets. When using our pre-trained model on SOD datasets, please keep the input size as 320x320 to guarantee the performance.)
|
158 |
+
|
159 |
+
**(2020-May-16)** We highly appreciate **Cyril Diagne** for building this fantastic AR project: [**AR Copy and Paste**](https://github.com/cyrildiagne/ar-cutpaste) using our **U<sup>2</sup>-Net** (Qin *et al*, PR 2020) and [**BASNet**](https://github.com/NathanUA/BASNet)(Qin *et al*, CVPR 2019). The [**demo video**](https://twitter.com/cyrildiagne/status/1256916982764646402) in twitter has achieved over **5M** views, which is phenomenal and shows us more application possibilities of SOD.
|
160 |
+
|
161 |
+
## U<sup>2</sup>-Net Results (176.3 MB)
|
162 |
+
|
163 |
+

|
164 |
+
|
165 |
+
|
166 |
+
## Our previous work: [BASNet (CVPR 2019)](https://github.com/NathanUA/BASNet)
|
167 |
+
|
168 |
+
## Required libraries
|
169 |
+
|
170 |
+
Python 3.6
|
171 |
+
numpy 1.15.2
|
172 |
+
scikit-image 0.14.0
|
173 |
+
python-opencv
|
174 |
+
PIL 5.2.0
|
175 |
+
PyTorch 0.4.0
|
176 |
+
torchvision 0.2.1
|
177 |
+
glob
|
178 |
+
|
179 |
+
## Usage for salient object detection
|
180 |
+
1. Clone this repo
|
181 |
+
```
|
182 |
+
git clone https://github.com/NathanUA/U-2-Net.git
|
183 |
+
```
|
184 |
+
2. Download the pre-trained model u2net.pth (176.3 MB) from [**GoogleDrive**](https://drive.google.com/file/d/1ao1ovG1Qtx4b7EoskHXmi2E9rp5CHLcZ/view?usp=sharing) or [**Baidu Pan 提取码: pf9k**](https://pan.baidu.com/s/1WjwyEwDiaUjBbx_QxcXBwQ) or u2netp.pth (4.7 MB) from [**GoogleDrive**](https://drive.google.com/file/d/1rbSTGKAE-MTxBYHd-51l2hMOQPT_7EPy/view?usp=sharing) or [**Baidu Pan 提取码: 8xsi**](https://pan.baidu.com/s/10tW12OlecRpE696z8FxdNQ) and put it into the dirctory './saved_models/u2net/' and './saved_models/u2netp/'
|
185 |
+
|
186 |
+
3. Cd to the directory 'U-2-Net', run the train or inference process by command: ```python u2net_train.py```
|
187 |
+
or ```python u2net_test.py``` respectively. The 'model_name' in both files can be changed to 'u2net' or 'u2netp' for using different models.
|
188 |
+
|
189 |
+
We also provide the predicted saliency maps ([u2net results](https://drive.google.com/file/d/1mZFWlS4WygWh1eVI8vK2Ad9LrPq4Hp5v/view?usp=sharing),[u2netp results](https://drive.google.com/file/d/1j2pU7vyhOO30C2S_FJuRdmAmMt3-xmjD/view?usp=sharing)) for datasets SOD, ECSSD, DUT-OMRON, PASCAL-S, HKU-IS and DUTS-TE.
|
190 |
+
|
191 |
+
|
192 |
+
## U<sup>2</sup>-Net Architecture
|
193 |
+
|
194 |
+

|
195 |
+
|
196 |
+
|
197 |
+
## Quantitative Comparison
|
198 |
+
|
199 |
+

|
200 |
+
|
201 |
+

|
202 |
+
|
203 |
+
|
204 |
+
## Qualitative Comparison
|
205 |
+
|
206 |
+

|
207 |
+
|
208 |
+
## Citation
|
209 |
+
```
|
210 |
+
@InProceedings{Qin_2020_PR,
|
211 |
+
title = {U2-Net: Going Deeper with Nested U-Structure for Salient Object Detection},
|
212 |
+
author = {Qin, Xuebin and Zhang, Zichen and Huang, Chenyang and Dehghan, Masood and Zaiane, Osmar and Jagersand, Martin},
|
213 |
+
journal = {Pattern Recognition},
|
214 |
+
volume = {106},
|
215 |
+
pages = {107404},
|
216 |
+
year = {2020}
|
217 |
+
}
|
218 |
+
```
|
clipping_camera.jpg
ADDED
![]() |
Git LFS Details
|
data_loader.py
ADDED
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# data loader
|
2 |
+
from __future__ import print_function, division
|
3 |
+
import glob
|
4 |
+
import torch
|
5 |
+
from skimage import io, transform, color
|
6 |
+
import numpy as np
|
7 |
+
import random
|
8 |
+
import math
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
from torch.utils.data import Dataset, DataLoader
|
11 |
+
from torchvision import transforms, utils
|
12 |
+
from PIL import Image
|
13 |
+
|
14 |
+
#==========================dataset load==========================
|
15 |
+
class RescaleT(object):
|
16 |
+
|
17 |
+
def __init__(self,output_size):
|
18 |
+
assert isinstance(output_size,(int,tuple))
|
19 |
+
self.output_size = output_size
|
20 |
+
|
21 |
+
def __call__(self,sample):
|
22 |
+
imidx, image, label = sample['imidx'], sample['image'],sample['label']
|
23 |
+
|
24 |
+
h, w = image.shape[:2]
|
25 |
+
|
26 |
+
if isinstance(self.output_size,int):
|
27 |
+
if h > w:
|
28 |
+
new_h, new_w = self.output_size*h/w,self.output_size
|
29 |
+
else:
|
30 |
+
new_h, new_w = self.output_size,self.output_size*w/h
|
31 |
+
else:
|
32 |
+
new_h, new_w = self.output_size
|
33 |
+
|
34 |
+
new_h, new_w = int(new_h), int(new_w)
|
35 |
+
|
36 |
+
# #resize the image to new_h x new_w and convert image from range [0,255] to [0,1]
|
37 |
+
# img = transform.resize(image,(new_h,new_w),mode='constant')
|
38 |
+
# lbl = transform.resize(label,(new_h,new_w),mode='constant', order=0, preserve_range=True)
|
39 |
+
|
40 |
+
img = transform.resize(image,(self.output_size,self.output_size),mode='constant')
|
41 |
+
lbl = transform.resize(label,(self.output_size,self.output_size),mode='constant', order=0, preserve_range=True)
|
42 |
+
|
43 |
+
return {'imidx':imidx, 'image':img,'label':lbl}
|
44 |
+
|
45 |
+
class Rescale(object):
|
46 |
+
|
47 |
+
def __init__(self,output_size):
|
48 |
+
assert isinstance(output_size,(int,tuple))
|
49 |
+
self.output_size = output_size
|
50 |
+
|
51 |
+
def __call__(self,sample):
|
52 |
+
imidx, image, label = sample['imidx'], sample['image'],sample['label']
|
53 |
+
|
54 |
+
if random.random() >= 0.5:
|
55 |
+
image = image[::-1]
|
56 |
+
label = label[::-1]
|
57 |
+
|
58 |
+
h, w = image.shape[:2]
|
59 |
+
|
60 |
+
if isinstance(self.output_size,int):
|
61 |
+
if h > w:
|
62 |
+
new_h, new_w = self.output_size*h/w,self.output_size
|
63 |
+
else:
|
64 |
+
new_h, new_w = self.output_size,self.output_size*w/h
|
65 |
+
else:
|
66 |
+
new_h, new_w = self.output_size
|
67 |
+
|
68 |
+
new_h, new_w = int(new_h), int(new_w)
|
69 |
+
|
70 |
+
# #resize the image to new_h x new_w and convert image from range [0,255] to [0,1]
|
71 |
+
img = transform.resize(image,(new_h,new_w),mode='constant')
|
72 |
+
lbl = transform.resize(label,(new_h,new_w),mode='constant', order=0, preserve_range=True)
|
73 |
+
|
74 |
+
return {'imidx':imidx, 'image':img,'label':lbl}
|
75 |
+
|
76 |
+
class RandomCrop(object):
|
77 |
+
|
78 |
+
def __init__(self,output_size):
|
79 |
+
assert isinstance(output_size, (int, tuple))
|
80 |
+
if isinstance(output_size, int):
|
81 |
+
self.output_size = (output_size, output_size)
|
82 |
+
else:
|
83 |
+
assert len(output_size) == 2
|
84 |
+
self.output_size = output_size
|
85 |
+
def __call__(self,sample):
|
86 |
+
imidx, image, label = sample['imidx'], sample['image'], sample['label']
|
87 |
+
|
88 |
+
if random.random() >= 0.5:
|
89 |
+
image = image[::-1]
|
90 |
+
label = label[::-1]
|
91 |
+
|
92 |
+
h, w = image.shape[:2]
|
93 |
+
new_h, new_w = self.output_size
|
94 |
+
|
95 |
+
top = np.random.randint(0, h - new_h)
|
96 |
+
left = np.random.randint(0, w - new_w)
|
97 |
+
|
98 |
+
image = image[top: top + new_h, left: left + new_w]
|
99 |
+
label = label[top: top + new_h, left: left + new_w]
|
100 |
+
|
101 |
+
return {'imidx':imidx,'image':image, 'label':label}
|
102 |
+
|
103 |
+
class ToTensor(object):
|
104 |
+
"""Convert ndarrays in sample to Tensors."""
|
105 |
+
|
106 |
+
def __call__(self, sample):
|
107 |
+
|
108 |
+
imidx, image, label = sample['imidx'], sample['image'], sample['label']
|
109 |
+
|
110 |
+
tmpImg = np.zeros((image.shape[0],image.shape[1],3))
|
111 |
+
tmpLbl = np.zeros(label.shape)
|
112 |
+
|
113 |
+
image = image/np.max(image)
|
114 |
+
if(np.max(label)<1e-6):
|
115 |
+
label = label
|
116 |
+
else:
|
117 |
+
label = label/np.max(label)
|
118 |
+
|
119 |
+
if image.shape[2]==1:
|
120 |
+
tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
|
121 |
+
tmpImg[:,:,1] = (image[:,:,0]-0.485)/0.229
|
122 |
+
tmpImg[:,:,2] = (image[:,:,0]-0.485)/0.229
|
123 |
+
else:
|
124 |
+
tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
|
125 |
+
tmpImg[:,:,1] = (image[:,:,1]-0.456)/0.224
|
126 |
+
tmpImg[:,:,2] = (image[:,:,2]-0.406)/0.225
|
127 |
+
|
128 |
+
tmpLbl[:,:,0] = label[:,:,0]
|
129 |
+
|
130 |
+
|
131 |
+
tmpImg = tmpImg.transpose((2, 0, 1))
|
132 |
+
tmpLbl = label.transpose((2, 0, 1))
|
133 |
+
|
134 |
+
return {'imidx':torch.from_numpy(imidx), 'image': torch.from_numpy(tmpImg), 'label': torch.from_numpy(tmpLbl)}
|
135 |
+
|
136 |
+
class ToTensorLab(object):
|
137 |
+
"""Convert ndarrays in sample to Tensors."""
|
138 |
+
def __init__(self,flag=0):
|
139 |
+
self.flag = flag
|
140 |
+
|
141 |
+
def __call__(self, sample):
|
142 |
+
|
143 |
+
imidx, image, label =sample['imidx'], sample['image'], sample['label']
|
144 |
+
|
145 |
+
tmpLbl = np.zeros(label.shape)
|
146 |
+
|
147 |
+
if(np.max(label)<1e-6):
|
148 |
+
label = label
|
149 |
+
else:
|
150 |
+
label = label/np.max(label)
|
151 |
+
|
152 |
+
# change the color space
|
153 |
+
if self.flag == 2: # with rgb and Lab colors
|
154 |
+
tmpImg = np.zeros((image.shape[0],image.shape[1],6))
|
155 |
+
tmpImgt = np.zeros((image.shape[0],image.shape[1],3))
|
156 |
+
if image.shape[2]==1:
|
157 |
+
tmpImgt[:,:,0] = image[:,:,0]
|
158 |
+
tmpImgt[:,:,1] = image[:,:,0]
|
159 |
+
tmpImgt[:,:,2] = image[:,:,0]
|
160 |
+
else:
|
161 |
+
tmpImgt = image
|
162 |
+
tmpImgtl = color.rgb2lab(tmpImgt)
|
163 |
+
|
164 |
+
# nomalize image to range [0,1]
|
165 |
+
tmpImg[:,:,0] = (tmpImgt[:,:,0]-np.min(tmpImgt[:,:,0]))/(np.max(tmpImgt[:,:,0])-np.min(tmpImgt[:,:,0]))
|
166 |
+
tmpImg[:,:,1] = (tmpImgt[:,:,1]-np.min(tmpImgt[:,:,1]))/(np.max(tmpImgt[:,:,1])-np.min(tmpImgt[:,:,1]))
|
167 |
+
tmpImg[:,:,2] = (tmpImgt[:,:,2]-np.min(tmpImgt[:,:,2]))/(np.max(tmpImgt[:,:,2])-np.min(tmpImgt[:,:,2]))
|
168 |
+
tmpImg[:,:,3] = (tmpImgtl[:,:,0]-np.min(tmpImgtl[:,:,0]))/(np.max(tmpImgtl[:,:,0])-np.min(tmpImgtl[:,:,0]))
|
169 |
+
tmpImg[:,:,4] = (tmpImgtl[:,:,1]-np.min(tmpImgtl[:,:,1]))/(np.max(tmpImgtl[:,:,1])-np.min(tmpImgtl[:,:,1]))
|
170 |
+
tmpImg[:,:,5] = (tmpImgtl[:,:,2]-np.min(tmpImgtl[:,:,2]))/(np.max(tmpImgtl[:,:,2])-np.min(tmpImgtl[:,:,2]))
|
171 |
+
|
172 |
+
# tmpImg = tmpImg/(np.max(tmpImg)-np.min(tmpImg))
|
173 |
+
|
174 |
+
tmpImg[:,:,0] = (tmpImg[:,:,0]-np.mean(tmpImg[:,:,0]))/np.std(tmpImg[:,:,0])
|
175 |
+
tmpImg[:,:,1] = (tmpImg[:,:,1]-np.mean(tmpImg[:,:,1]))/np.std(tmpImg[:,:,1])
|
176 |
+
tmpImg[:,:,2] = (tmpImg[:,:,2]-np.mean(tmpImg[:,:,2]))/np.std(tmpImg[:,:,2])
|
177 |
+
tmpImg[:,:,3] = (tmpImg[:,:,3]-np.mean(tmpImg[:,:,3]))/np.std(tmpImg[:,:,3])
|
178 |
+
tmpImg[:,:,4] = (tmpImg[:,:,4]-np.mean(tmpImg[:,:,4]))/np.std(tmpImg[:,:,4])
|
179 |
+
tmpImg[:,:,5] = (tmpImg[:,:,5]-np.mean(tmpImg[:,:,5]))/np.std(tmpImg[:,:,5])
|
180 |
+
|
181 |
+
elif self.flag == 1: #with Lab color
|
182 |
+
tmpImg = np.zeros((image.shape[0],image.shape[1],3))
|
183 |
+
|
184 |
+
if image.shape[2]==1:
|
185 |
+
tmpImg[:,:,0] = image[:,:,0]
|
186 |
+
tmpImg[:,:,1] = image[:,:,0]
|
187 |
+
tmpImg[:,:,2] = image[:,:,0]
|
188 |
+
else:
|
189 |
+
tmpImg = image
|
190 |
+
|
191 |
+
tmpImg = color.rgb2lab(tmpImg)
|
192 |
+
|
193 |
+
# tmpImg = tmpImg/(np.max(tmpImg)-np.min(tmpImg))
|
194 |
+
|
195 |
+
tmpImg[:,:,0] = (tmpImg[:,:,0]-np.min(tmpImg[:,:,0]))/(np.max(tmpImg[:,:,0])-np.min(tmpImg[:,:,0]))
|
196 |
+
tmpImg[:,:,1] = (tmpImg[:,:,1]-np.min(tmpImg[:,:,1]))/(np.max(tmpImg[:,:,1])-np.min(tmpImg[:,:,1]))
|
197 |
+
tmpImg[:,:,2] = (tmpImg[:,:,2]-np.min(tmpImg[:,:,2]))/(np.max(tmpImg[:,:,2])-np.min(tmpImg[:,:,2]))
|
198 |
+
|
199 |
+
tmpImg[:,:,0] = (tmpImg[:,:,0]-np.mean(tmpImg[:,:,0]))/np.std(tmpImg[:,:,0])
|
200 |
+
tmpImg[:,:,1] = (tmpImg[:,:,1]-np.mean(tmpImg[:,:,1]))/np.std(tmpImg[:,:,1])
|
201 |
+
tmpImg[:,:,2] = (tmpImg[:,:,2]-np.mean(tmpImg[:,:,2]))/np.std(tmpImg[:,:,2])
|
202 |
+
|
203 |
+
else: # with rgb color
|
204 |
+
tmpImg = np.zeros((image.shape[0],image.shape[1],3))
|
205 |
+
image = image/np.max(image)
|
206 |
+
if image.shape[2]==1:
|
207 |
+
tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
|
208 |
+
tmpImg[:,:,1] = (image[:,:,0]-0.485)/0.229
|
209 |
+
tmpImg[:,:,2] = (image[:,:,0]-0.485)/0.229
|
210 |
+
else:
|
211 |
+
tmpImg[:,:,0] = (image[:,:,0]-0.485)/0.229
|
212 |
+
tmpImg[:,:,1] = (image[:,:,1]-0.456)/0.224
|
213 |
+
tmpImg[:,:,2] = (image[:,:,2]-0.406)/0.225
|
214 |
+
|
215 |
+
tmpLbl[:,:,0] = label[:,:,0]
|
216 |
+
|
217 |
+
|
218 |
+
tmpImg = tmpImg.transpose((2, 0, 1))
|
219 |
+
tmpLbl = label.transpose((2, 0, 1))
|
220 |
+
|
221 |
+
return {'imidx':torch.from_numpy(imidx), 'image': torch.from_numpy(tmpImg), 'label': torch.from_numpy(tmpLbl)}
|
222 |
+
|
223 |
+
class SalObjDataset(Dataset):
|
224 |
+
def __init__(self,img_name_list,lbl_name_list,transform=None):
|
225 |
+
# self.root_dir = root_dir
|
226 |
+
# self.image_name_list = glob.glob(image_dir+'*.png')
|
227 |
+
# self.label_name_list = glob.glob(label_dir+'*.png')
|
228 |
+
self.image_name_list = img_name_list
|
229 |
+
self.label_name_list = lbl_name_list
|
230 |
+
self.transform = transform
|
231 |
+
|
232 |
+
def __len__(self):
|
233 |
+
return len(self.image_name_list)
|
234 |
+
|
235 |
+
def __getitem__(self,idx):
|
236 |
+
|
237 |
+
# image = Image.open(self.image_name_list[idx])#io.imread(self.image_name_list[idx])
|
238 |
+
# label = Image.open(self.label_name_list[idx])#io.imread(self.label_name_list[idx])
|
239 |
+
|
240 |
+
image = io.imread(self.image_name_list[idx])
|
241 |
+
imname = self.image_name_list[idx]
|
242 |
+
imidx = np.array([idx])
|
243 |
+
|
244 |
+
if(0==len(self.label_name_list)):
|
245 |
+
label_3 = np.zeros(image.shape)
|
246 |
+
else:
|
247 |
+
label_3 = io.imread(self.label_name_list[idx])
|
248 |
+
|
249 |
+
label = np.zeros(label_3.shape[0:2])
|
250 |
+
if(3==len(label_3.shape)):
|
251 |
+
label = label_3[:,:,0]
|
252 |
+
elif(2==len(label_3.shape)):
|
253 |
+
label = label_3
|
254 |
+
|
255 |
+
if(3==len(image.shape) and 2==len(label.shape)):
|
256 |
+
label = label[:,:,np.newaxis]
|
257 |
+
elif(2==len(image.shape) and 2==len(label.shape)):
|
258 |
+
image = image[:,:,np.newaxis]
|
259 |
+
label = label[:,:,np.newaxis]
|
260 |
+
|
261 |
+
sample = {'imidx':imidx, 'image':image, 'label':label}
|
262 |
+
|
263 |
+
if self.transform:
|
264 |
+
sample = self.transform(sample)
|
265 |
+
|
266 |
+
return sample
|
download_from_hf.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
HuggingFace model download utility for U2NET MVTec LOCO segmentation
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
from huggingface_hub import hf_hub_download, snapshot_download
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
def download_u2net_model(model_path="./saved_models/u2net/u2net.pth",
|
11 |
+
repo_id="zhiqing0205/u2net-mvtec-loco-segmentation",
|
12 |
+
force_download=False):
|
13 |
+
"""
|
14 |
+
Download U2NET model from HuggingFace Hub
|
15 |
+
|
16 |
+
Args:
|
17 |
+
model_path: Local path to save the model
|
18 |
+
repo_id: HuggingFace repository ID
|
19 |
+
force_download: Force re-download even if file exists
|
20 |
+
"""
|
21 |
+
|
22 |
+
# Check if model already exists
|
23 |
+
if os.path.exists(model_path) and not force_download:
|
24 |
+
print(f"Model already exists at {model_path}")
|
25 |
+
return model_path
|
26 |
+
|
27 |
+
print(f"Downloading U2NET model from HuggingFace: {repo_id}")
|
28 |
+
|
29 |
+
try:
|
30 |
+
# Ensure directory exists
|
31 |
+
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
32 |
+
|
33 |
+
# Download specific model file
|
34 |
+
downloaded_path = hf_hub_download(
|
35 |
+
repo_id=repo_id,
|
36 |
+
filename="saved_models/u2net/u2net.pth",
|
37 |
+
local_dir=".",
|
38 |
+
local_dir_use_symlinks=False
|
39 |
+
)
|
40 |
+
|
41 |
+
print(f"Model downloaded successfully to: {downloaded_path}")
|
42 |
+
return downloaded_path
|
43 |
+
|
44 |
+
except Exception as e:
|
45 |
+
print(f"Error downloading model: {e}")
|
46 |
+
print("Please download manually from:")
|
47 |
+
print("https://drive.google.com/file/d/1ao1ovG1Qtx4b7EoskHXmi2E9rp5CHLcZ/view")
|
48 |
+
return None
|
49 |
+
|
50 |
+
def download_complete_repo(local_dir="./u2net-mvtec-loco",
|
51 |
+
repo_id="zhiqing0205/u2net-mvtec-loco-segmentation"):
|
52 |
+
"""
|
53 |
+
Download complete repository from HuggingFace Hub
|
54 |
+
|
55 |
+
Args:
|
56 |
+
local_dir: Local directory to save the repo
|
57 |
+
repo_id: HuggingFace repository ID
|
58 |
+
"""
|
59 |
+
|
60 |
+
print(f"Downloading complete repository: {repo_id}")
|
61 |
+
|
62 |
+
try:
|
63 |
+
# Download entire repository
|
64 |
+
snapshot_download(
|
65 |
+
repo_id=repo_id,
|
66 |
+
local_dir=local_dir,
|
67 |
+
local_dir_use_symlinks=False
|
68 |
+
)
|
69 |
+
|
70 |
+
print(f"Repository downloaded successfully to: {local_dir}")
|
71 |
+
return local_dir
|
72 |
+
|
73 |
+
except Exception as e:
|
74 |
+
print(f"Error downloading repository: {e}")
|
75 |
+
return None
|
76 |
+
|
77 |
+
if __name__ == "__main__":
|
78 |
+
import argparse
|
79 |
+
|
80 |
+
parser = argparse.ArgumentParser(description="Download U2NET model or complete repo from HuggingFace")
|
81 |
+
parser.add_argument("--model-only", action="store_true",
|
82 |
+
help="Download only the model file")
|
83 |
+
parser.add_argument("--complete-repo", action="store_true",
|
84 |
+
help="Download the complete repository")
|
85 |
+
parser.add_argument("--repo-id", type=str,
|
86 |
+
default="zhiqing0205/u2net-mvtec-loco-segmentation",
|
87 |
+
help="HuggingFace repository ID")
|
88 |
+
parser.add_argument("--force", action="store_true",
|
89 |
+
help="Force download even if files exist")
|
90 |
+
|
91 |
+
args = parser.parse_args()
|
92 |
+
|
93 |
+
if args.complete_repo:
|
94 |
+
download_complete_repo(repo_id=args.repo_id)
|
95 |
+
else:
|
96 |
+
download_u2net_model(repo_id=args.repo_id, force_download=args.force)
|
fg_mask_mvtec_loco.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46d1b9829d323c7388a42841fb7f7759dd82027714697129b22ade369b98c714
|
3 |
+
size 27964370
|
figures/3d-photo-re.jpg
ADDED
![]() |
Git LFS Details
|
figures/LensOCR.jpg
ADDED
![]() |
Git LFS Details
|
figures/U2NETPR.png
ADDED
![]() |
Git LFS Details
|
figures/U2Net_Logo.png
ADDED
![]() |
Git LFS Details
|
figures/art_transfer.JPG
ADDED
|
Git LFS Details
|
figures/bg-removal.gif
ADDED
![]() |
Git LFS Details
|
figures/bg-rm-aug.gif
ADDED
![]() |
Git LFS Details
|
figures/clipping_camera.jpg
ADDED
![]() |
Git LFS Details
|
figures/close-seg.jpg
ADDED
![]() |
Git LFS Details
|
figures/gradio_web_demo.jpg
ADDED
![]() |
Git LFS Details
|
figures/hotpot.png
ADDED
![]() |
Git LFS Details
|
figures/human_seg.png
ADDED
![]() |
Git LFS Details
|
figures/human_seg_results.gif
ADDED
![]() |
Git LFS Details
|
figures/human_seg_video.gif
ADDED
![]() |
Git LFS Details
|
figures/im_composite.jpg
ADDED
![]() |
Git LFS Details
|
figures/lensto.png
ADDED
![]() |
Git LFS Details
|
figures/motor-demo.gif
ADDED
![]() |
Git LFS Details
|
figures/pixelmator.jpg
ADDED
![]() |
Git LFS Details
|
figures/portrait-ios-app.jpg
ADDED
![]() |
Git LFS Details
|
figures/portrait_kids.png
ADDED
![]() |
Git LFS Details
|
figures/portrait_ladies.png
ADDED
![]() |
Git LFS Details
|
figures/portrait_men.png
ADDED
![]() |
Git LFS Details
|
figures/profuai.png
ADDED
![]() |
Git LFS Details
|
figures/qual.png
ADDED
![]() |
Git LFS Details
|
figures/quan_1.png
ADDED
![]() |
Git LFS Details
|
figures/quan_2.png
ADDED
![]() |
Git LFS Details
|
figures/rembg.png
ADDED
![]() |
Git LFS Details
|
figures/rm_bg.JPG
ADDED
|
Git LFS Details
|
figures/ship-demo.gif
ADDED
![]() |
Git LFS Details
|
figures/silueta.png
ADDED
![]() |
Git LFS Details
|
figures/sky-seg.png
ADDED
![]() |
Git LFS Details
|
figures/style-trans.JPG
ADDED
|
Git LFS Details
|
figures/swift-u2net.jpeg
ADDED
![]() |
Git LFS Details
|
figures/u2net-best-paper.jpg
ADDED
![]() |
Git LFS Details
|
figures/u2netqual.png
ADDED
![]() |
Git LFS Details
|
figures/view-move.gif
ADDED
![]() |
Git LFS Details
|
figures/xuebin-demo.png
ADDED
![]() |
Git LFS Details
|
gradio/demo.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import paddlehub as hub
|
3 |
+
import gradio as gr
|
4 |
+
import torch
|
5 |
+
|
6 |
+
# Images
|
7 |
+
torch.hub.download_url_to_file('https://cdn.pixabay.com/photo/2018/08/12/16/59/ara-3601194_1280.jpg', 'parrot.jpg')
|
8 |
+
torch.hub.download_url_to_file('https://cdn.pixabay.com/photo/2016/10/21/14/46/fox-1758183_1280.jpg', 'fox.jpg')
|
9 |
+
|
10 |
+
model = hub.Module(name='U2Net')
|
11 |
+
|
12 |
+
def infer(img):
|
13 |
+
result = model.Segmentation(
|
14 |
+
images=[cv2.imread(img.name)],
|
15 |
+
paths=None,
|
16 |
+
batch_size=1,
|
17 |
+
input_size=320,
|
18 |
+
output_dir='output',
|
19 |
+
visualization=True)
|
20 |
+
return result[0]['front'][:,:,::-1], result[0]['mask']
|
21 |
+
|
22 |
+
inputs = gr.inputs.Image(type='file', label="Original Image")
|
23 |
+
outputs = [
|
24 |
+
gr.outputs.Image(type="numpy",label="Front"),
|
25 |
+
gr.outputs.Image(type="numpy",label="Mask")
|
26 |
+
]
|
27 |
+
|
28 |
+
title = "U^2-Net"
|
29 |
+
description = "demo for U^2-Net. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
|
30 |
+
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2005.09007'>U^2-Net: Going Deeper with Nested U-Structure for Salient Object Detection</a> | <a href='https://github.com/xuebinqin/U-2-Net'>Github Repo</a></p>"
|
31 |
+
|
32 |
+
examples = [
|
33 |
+
['fox.jpg'],
|
34 |
+
['parrot.jpg']
|
35 |
+
]
|
36 |
+
|
37 |
+
gr.Interface(infer, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()
|
model/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .u2net import U2NET
|
2 |
+
from .u2net import U2NETP
|