Spaces:
Running
Running
Commit
·
b7eedf7
1
Parent(s):
9480700
add thirdparty
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +4 -0
- .gitignore +73 -0
- app.py +1 -1
- thirdparty/DROID-SLAM/.gitignore +158 -0
- thirdparty/DROID-SLAM/.gitmodules +6 -0
- thirdparty/DROID-SLAM/LICENSE +29 -0
- thirdparty/DROID-SLAM/README.md +139 -0
- thirdparty/DROID-SLAM/demo.py +135 -0
- thirdparty/DROID-SLAM/droid_slam/data_readers/__init__.py +1 -0
- thirdparty/DROID-SLAM/droid_slam/data_readers/augmentation.py +58 -0
- thirdparty/DROID-SLAM/droid_slam/data_readers/base.py +157 -0
- thirdparty/DROID-SLAM/droid_slam/data_readers/factory.py +82 -0
- thirdparty/DROID-SLAM/droid_slam/data_readers/rgbd_utils.py +190 -0
- thirdparty/DROID-SLAM/droid_slam/data_readers/stream.py +234 -0
- thirdparty/DROID-SLAM/droid_slam/data_readers/tartan.py +138 -0
- thirdparty/DROID-SLAM/droid_slam/data_readers/tartan_test.txt +32 -0
- thirdparty/DROID-SLAM/droid_slam/depth_video.py +197 -0
- thirdparty/DROID-SLAM/droid_slam/droid.py +102 -0
- thirdparty/DROID-SLAM/droid_slam/droid_backend.py +52 -0
- thirdparty/DROID-SLAM/droid_slam/droid_frontend.py +119 -0
- thirdparty/DROID-SLAM/droid_slam/droid_net.py +226 -0
- thirdparty/DROID-SLAM/droid_slam/factor_graph.py +397 -0
- thirdparty/DROID-SLAM/droid_slam/geom/__init__.py +0 -0
- thirdparty/DROID-SLAM/droid_slam/geom/ba.py +158 -0
- thirdparty/DROID-SLAM/droid_slam/geom/chol.py +73 -0
- thirdparty/DROID-SLAM/droid_slam/geom/graph_utils.py +113 -0
- thirdparty/DROID-SLAM/droid_slam/geom/losses.py +118 -0
- thirdparty/DROID-SLAM/droid_slam/geom/projective_ops.py +139 -0
- thirdparty/DROID-SLAM/droid_slam/logger.py +54 -0
- thirdparty/DROID-SLAM/droid_slam/modules/__init__.py +0 -0
- thirdparty/DROID-SLAM/droid_slam/modules/clipping.py +24 -0
- thirdparty/DROID-SLAM/droid_slam/modules/corr.py +140 -0
- thirdparty/DROID-SLAM/droid_slam/modules/extractor.py +198 -0
- thirdparty/DROID-SLAM/droid_slam/modules/gru.py +34 -0
- thirdparty/DROID-SLAM/droid_slam/motion_filter.py +92 -0
- thirdparty/DROID-SLAM/droid_slam/trajectory_filler.py +112 -0
- thirdparty/DROID-SLAM/droid_slam/vis_headless.py +185 -0
- thirdparty/DROID-SLAM/droid_slam/visualization.py +189 -0
- thirdparty/DROID-SLAM/environment.yaml +22 -0
- thirdparty/DROID-SLAM/environment_novis.yaml +20 -0
- thirdparty/DROID-SLAM/evaluation_scripts/test_eth3d.py +134 -0
- thirdparty/DROID-SLAM/evaluation_scripts/test_euroc.py +142 -0
- thirdparty/DROID-SLAM/evaluation_scripts/test_tum.py +123 -0
- thirdparty/DROID-SLAM/evaluation_scripts/validate_tartanair.py +115 -0
- thirdparty/DROID-SLAM/misc/DROID.png +3 -0
- thirdparty/DROID-SLAM/misc/renderoption.json +40 -0
- thirdparty/DROID-SLAM/misc/screenshot.png +3 -0
- thirdparty/DROID-SLAM/setup.py +61 -0
- thirdparty/DROID-SLAM/src/altcorr_kernel.cu +356 -0
- thirdparty/DROID-SLAM/src/correlation_kernels.cu +185 -0
.gitattributes
CHANGED
@@ -35,3 +35,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
37 |
*.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
37 |
*.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
thirdparty/Metric3D/media/gifs/demo_1.gif filter=lfs diff=lfs merge=lfs -text
|
39 |
+
thirdparty/Metric3D/training/kitti_json_files/eigen_train.json filter=lfs diff=lfs merge=lfs -text
|
40 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
41 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Project specific data and submodule
|
2 |
+
|
3 |
+
/example/video_0
|
4 |
+
/weights
|
5 |
+
|
6 |
+
.vscode/
|
7 |
+
**/.DS_Store
|
8 |
+
data/pretrain/*.pth
|
9 |
+
data/pretrain/*.pth.tar
|
10 |
+
data/smpl/SMPL_*.pkl
|
11 |
+
*.mov
|
12 |
+
example_video/
|
13 |
+
/thirdparty/detection
|
14 |
+
experiments/
|
15 |
+
logs/
|
16 |
+
hot3d_*/
|
17 |
+
File/
|
18 |
+
thirdparty/ZoeDepth
|
19 |
+
eval_vis_mdslam/
|
20 |
+
eval_vis_*/
|
21 |
+
pred_vis/
|
22 |
+
_DATA.zip
|
23 |
+
train_ddp_process*
|
24 |
+
logs*
|
25 |
+
/*_trainset_export/
|
26 |
+
/*.png
|
27 |
+
/*.zip
|
28 |
+
/dataset_tars/
|
29 |
+
/dataset_untars/
|
30 |
+
/datasets/
|
31 |
+
/eval_log*/
|
32 |
+
*.pth
|
33 |
+
*.pkl
|
34 |
+
/dataset*/
|
35 |
+
/eval*/
|
36 |
+
/thirdparty/aitviewer
|
37 |
+
|
38 |
+
# Byte-compiled / optimized / DLL files
|
39 |
+
__pycache__/
|
40 |
+
|
41 |
+
# Distribution / packaging
|
42 |
+
.Python
|
43 |
+
build/
|
44 |
+
develop-eggs/
|
45 |
+
dist/
|
46 |
+
downloads/
|
47 |
+
eggs/
|
48 |
+
.eggs/
|
49 |
+
lib64/
|
50 |
+
parts/
|
51 |
+
sdist/
|
52 |
+
var/
|
53 |
+
wheels/
|
54 |
+
pip-wheel-metadata/
|
55 |
+
share/python-wheels/
|
56 |
+
*.egg-info/
|
57 |
+
.installed.cfg
|
58 |
+
*.egg
|
59 |
+
MANIFEST
|
60 |
+
|
61 |
+
# Jupyter Notebook
|
62 |
+
.ipynb_checkpoints
|
63 |
+
*.ipynb
|
64 |
+
|
65 |
+
# IPython
|
66 |
+
profile_default/
|
67 |
+
ipython_config.py
|
68 |
+
|
69 |
+
# pyenv
|
70 |
+
.python-version
|
71 |
+
|
72 |
+
vis.mp4
|
73 |
+
imgui.ini
|
app.py
CHANGED
@@ -121,7 +121,7 @@ header = ('''
|
|
121 |
<a href="" target="_blank" rel="noopener noreferrer">Jinglei Zhang</a><sup>1</sup>,
|
122 |
<a href="https://jiankangdeng.github.io/" target="_blank" rel="noopener noreferrer">Jiankang Deng</a><sup>2</sup>,
|
123 |
<br>
|
124 |
-
<a href="https://scholar.google.com/citations?user=syoPhv8AAAAJ&hl=en" target="_blank" rel="noopener noreferrer">Chao Ma</a><sup>1</sup
|
125 |
<a href="https://rolpotamias.github.io" target="_blank" rel="noopener noreferrer">Rolandos Alexandros Potamias</a><sup>2</sup>
|
126 |
</h3>
|
127 |
<h3>
|
|
|
121 |
<a href="" target="_blank" rel="noopener noreferrer">Jinglei Zhang</a><sup>1</sup>,
|
122 |
<a href="https://jiankangdeng.github.io/" target="_blank" rel="noopener noreferrer">Jiankang Deng</a><sup>2</sup>,
|
123 |
<br>
|
124 |
+
<a href="https://scholar.google.com/citations?user=syoPhv8AAAAJ&hl=en" target="_blank" rel="noopener noreferrer">Chao Ma</a><sup>1</sup>,
|
125 |
<a href="https://rolpotamias.github.io" target="_blank" rel="noopener noreferrer">Rolandos Alexandros Potamias</a><sup>2</sup>
|
126 |
</h3>
|
127 |
<h3>
|
thirdparty/DROID-SLAM/.gitignore
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
98 |
+
__pypackages__/
|
99 |
+
|
100 |
+
# Celery stuff
|
101 |
+
celerybeat-schedule
|
102 |
+
celerybeat.pid
|
103 |
+
|
104 |
+
# SageMath parsed files
|
105 |
+
*.sage.py
|
106 |
+
|
107 |
+
# Environments
|
108 |
+
.env
|
109 |
+
.venv
|
110 |
+
env/
|
111 |
+
venv/
|
112 |
+
ENV/
|
113 |
+
env.bak/
|
114 |
+
venv.bak/
|
115 |
+
|
116 |
+
# Spyder project settings
|
117 |
+
.spyderproject
|
118 |
+
.spyproject
|
119 |
+
|
120 |
+
# Rope project settings
|
121 |
+
.ropeproject
|
122 |
+
|
123 |
+
# mkdocs documentation
|
124 |
+
/site
|
125 |
+
|
126 |
+
# mypy
|
127 |
+
.mypy_cache/
|
128 |
+
.dmypy.json
|
129 |
+
dmypy.json
|
130 |
+
|
131 |
+
# Pyre type checker
|
132 |
+
.pyre/
|
133 |
+
|
134 |
+
# pytype static type analyzer
|
135 |
+
.pytype/
|
136 |
+
|
137 |
+
# Cython debug symbols
|
138 |
+
cython_debug/
|
139 |
+
|
140 |
+
|
141 |
+
|
142 |
+
__pycache__
|
143 |
+
build
|
144 |
+
dist
|
145 |
+
*.egg-info
|
146 |
+
*.vscode/
|
147 |
+
*.pth
|
148 |
+
tests
|
149 |
+
checkpoints
|
150 |
+
datasets
|
151 |
+
runs
|
152 |
+
cache
|
153 |
+
*.out
|
154 |
+
*.o
|
155 |
+
data
|
156 |
+
figures/*.pdf
|
157 |
+
|
158 |
+
|
thirdparty/DROID-SLAM/.gitmodules
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[submodule "thirdparty/lietorch"]
|
2 |
+
path = thirdparty/lietorch
|
3 |
+
url = https://github.com/princeton-vl/lietorch
|
4 |
+
[submodule "thirdparty/eigen"]
|
5 |
+
path = thirdparty/eigen
|
6 |
+
url = https://gitlab.com/libeigen/eigen.git
|
thirdparty/DROID-SLAM/LICENSE
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BSD 3-Clause License
|
2 |
+
|
3 |
+
Copyright (c) 2021, Princeton Vision & Learning Lab
|
4 |
+
All rights reserved.
|
5 |
+
|
6 |
+
Redistribution and use in source and binary forms, with or without
|
7 |
+
modification, are permitted provided that the following conditions are met:
|
8 |
+
|
9 |
+
1. Redistributions of source code must retain the above copyright notice, this
|
10 |
+
list of conditions and the following disclaimer.
|
11 |
+
|
12 |
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
13 |
+
this list of conditions and the following disclaimer in the documentation
|
14 |
+
and/or other materials provided with the distribution.
|
15 |
+
|
16 |
+
3. Neither the name of the copyright holder nor the names of its
|
17 |
+
contributors may be used to endorse or promote products derived from
|
18 |
+
this software without specific prior written permission.
|
19 |
+
|
20 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
21 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
22 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
23 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
24 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
25 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
26 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
27 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
28 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
thirdparty/DROID-SLAM/README.md
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# DROID-SLAM
|
2 |
+
|
3 |
+
|
4 |
+
<!-- <center><img src="misc/DROID.png" width="640" style="center"></center> -->
|
5 |
+
|
6 |
+
|
7 |
+
[![IMAGE ALT TEXT HERE](misc/screenshot.png)](https://www.youtube.com/watch?v=GG78CSlSHSA)
|
8 |
+
|
9 |
+
|
10 |
+
|
11 |
+
[DROID-SLAM: Deep Visual SLAM for Monocular, Stereo, and RGB-D Cameras](https://arxiv.org/abs/2108.10869)
|
12 |
+
Zachary Teed and Jia Deng
|
13 |
+
|
14 |
+
```
|
15 |
+
@article{teed2021droid,
|
16 |
+
title={{DROID-SLAM: Deep Visual SLAM for Monocular, Stereo, and RGB-D Cameras}},
|
17 |
+
author={Teed, Zachary and Deng, Jia},
|
18 |
+
journal={Advances in neural information processing systems},
|
19 |
+
year={2021}
|
20 |
+
}
|
21 |
+
```
|
22 |
+
|
23 |
+
**Initial Code Release:** This repo currently provides a single GPU implementation of our monocular, stereo, and RGB-D SLAM systems. It currently contains demos, training, and evaluation scripts.
|
24 |
+
|
25 |
+
|
26 |
+
## Requirements
|
27 |
+
|
28 |
+
To run the code you will need ...
|
29 |
+
* **Inference:** Running the demos will require a GPU with at least 11G of memory.
|
30 |
+
|
31 |
+
* **Training:** Training requires a GPU with at least 24G of memory. We train on 4 x RTX-3090 GPUs.
|
32 |
+
|
33 |
+
## Getting Started
|
34 |
+
1. Clone the repo using the `--recursive` flag
|
35 |
+
```Bash
|
36 |
+
git clone --recursive https://github.com/princeton-vl/DROID-SLAM.git
|
37 |
+
```
|
38 |
+
|
39 |
+
2. Creating a new anaconda environment using the provided .yaml file. Use `environment_novis.yaml` to if you do not want to use the visualization
|
40 |
+
```Bash
|
41 |
+
conda env create -f environment.yaml
|
42 |
+
pip install evo --upgrade --no-binary evo
|
43 |
+
pip install gdown
|
44 |
+
```
|
45 |
+
|
46 |
+
3. Compile the extensions (takes about 10 minutes)
|
47 |
+
```Bash
|
48 |
+
python setup.py install
|
49 |
+
```
|
50 |
+
|
51 |
+
|
52 |
+
## Demos
|
53 |
+
|
54 |
+
1. Download the model from google drive: [droid.pth](https://drive.google.com/file/d/1PpqVt1H4maBa_GbPJp4NwxRsd9jk-elh/view?usp=sharing)
|
55 |
+
|
56 |
+
2. Download some sample videos using the provided script.
|
57 |
+
```Bash
|
58 |
+
./tools/download_sample_data.sh
|
59 |
+
```
|
60 |
+
|
61 |
+
Run the demo on any of the samples (all demos can be run on a GPU with 11G of memory). While running, press the "s" key to increase the filtering threshold (= more points) and "a" to decrease the filtering threshold (= fewer points). To save the reconstruction with full resolution depth maps use the `--reconstruction_path` flag.
|
62 |
+
|
63 |
+
|
64 |
+
```Python
|
65 |
+
python demo.py --imagedir=data/abandonedfactory --calib=calib/tartan.txt --stride=2
|
66 |
+
```
|
67 |
+
|
68 |
+
```Python
|
69 |
+
python demo.py --imagedir=data/sfm_bench/rgb --calib=calib/eth.txt
|
70 |
+
```
|
71 |
+
|
72 |
+
```Python
|
73 |
+
python demo.py --imagedir=data/Barn --calib=calib/barn.txt --stride=1 --backend_nms=4
|
74 |
+
```
|
75 |
+
|
76 |
+
```Python
|
77 |
+
python demo.py --imagedir=data/mav0/cam0/data --calib=calib/euroc.txt --t0=150
|
78 |
+
```
|
79 |
+
|
80 |
+
```Python
|
81 |
+
python demo.py --imagedir=data/rgbd_dataset_freiburg3_cabinet/rgb --calib=calib/tum3.txt
|
82 |
+
```
|
83 |
+
|
84 |
+
|
85 |
+
**Running on your own data:** All you need is a calibration file. Calibration files are in the form
|
86 |
+
```
|
87 |
+
fx fy cx cy [k1 k2 p1 p2 [ k3 [ k4 k5 k6 ]]]
|
88 |
+
```
|
89 |
+
with parameters in brackets optional.
|
90 |
+
|
91 |
+
## Evaluation
|
92 |
+
We provide evaluation scripts for TartanAir, EuRoC, and TUM. EuRoC and TUM can be run on a 1080Ti. The TartanAir and ETH will require 24G of memory.
|
93 |
+
|
94 |
+
### TartanAir (Mono + Stereo)
|
95 |
+
Download the [TartanAir](https://theairlab.org/tartanair-dataset/) dataset using the script `thirdparty/tartanair_tools/download_training.py` and put them in `datasets/TartanAir`
|
96 |
+
```Bash
|
97 |
+
./tools/validate_tartanair.sh --plot_curve # monocular eval
|
98 |
+
./tools/validate_tartanair.sh --plot_curve --stereo # stereo eval
|
99 |
+
```
|
100 |
+
|
101 |
+
### EuRoC (Mono + Stereo)
|
102 |
+
Download the [EuRoC](https://projects.asl.ethz.ch/datasets/doku.php?id=kmavvisualinertialdatasets) sequences (ASL format) and put them in `datasets/EuRoC`
|
103 |
+
```Bash
|
104 |
+
./tools/evaluate_euroc.sh # monocular eval
|
105 |
+
./tools/evaluate_euroc.sh --stereo # stereo eval
|
106 |
+
```
|
107 |
+
|
108 |
+
### TUM-RGBD (Mono)
|
109 |
+
Download the fr1 sequences from [TUM-RGBD](https://vision.in.tum.de/data/datasets/rgbd-dataset/download) and put them in `datasets/TUM-RGBD`
|
110 |
+
```Bash
|
111 |
+
./tools/evaluate_tum.sh # monocular eval
|
112 |
+
```
|
113 |
+
|
114 |
+
### ETH3D (RGB-D)
|
115 |
+
Download the [ETH3D](https://www.eth3d.net/slam_datasets) dataset
|
116 |
+
```Bash
|
117 |
+
./tools/evaluate_eth3d.sh # RGB-D eval
|
118 |
+
```
|
119 |
+
|
120 |
+
## Training
|
121 |
+
|
122 |
+
First download the TartanAir dataset. The download script can be found in `thirdparty/tartanair_tools/download_training.py`. You will only need the `rgb` and `depth` data.
|
123 |
+
|
124 |
+
```
|
125 |
+
python download_training.py --rgb --depth
|
126 |
+
```
|
127 |
+
|
128 |
+
You can then run the training script. We use 4x3090 RTX GPUs for training which takes approximatly 1 week. If you use a different number of GPUs, adjust the learning rate accordingly.
|
129 |
+
|
130 |
+
**Note:** On the first training run, covisibility is computed between all pairs of frames. This can take several hours, but the results are cached so that future training runs will start immediately.
|
131 |
+
|
132 |
+
|
133 |
+
```
|
134 |
+
python train.py --datapath=<path to tartanair> --gpus=4 --lr=0.00025
|
135 |
+
```
|
136 |
+
|
137 |
+
|
138 |
+
## Acknowledgements
|
139 |
+
Data from [TartanAir](https://theairlab.org/tartanair-dataset/) was used to train our model. We additionally use evaluation tools from [evo](https://github.com/MichaelGrupp/evo) and [tartanair_tools](https://github.com/castacks/tartanair_tools).
|
thirdparty/DROID-SLAM/demo.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('droid_slam')
|
3 |
+
|
4 |
+
from tqdm import tqdm
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
import lietorch
|
8 |
+
import cv2
|
9 |
+
import os
|
10 |
+
import glob
|
11 |
+
import time
|
12 |
+
import argparse
|
13 |
+
|
14 |
+
from torch.multiprocessing import Process
|
15 |
+
from droid import Droid
|
16 |
+
|
17 |
+
from pycocotools import mask as masktool
|
18 |
+
import torch.nn.functional as F
|
19 |
+
|
20 |
+
|
21 |
+
def show_image(image):
|
22 |
+
image = image.permute(1, 2, 0).cpu().numpy()
|
23 |
+
cv2.imshow('image', image / 255.0)
|
24 |
+
cv2.waitKey(1)
|
25 |
+
|
26 |
+
def image_stream(imagedir, calib, stride):
|
27 |
+
""" image generator """
|
28 |
+
# calib = np.loadtxt(calib, delimiter=" ")
|
29 |
+
fx, fy, cx, cy = calib[:4]
|
30 |
+
|
31 |
+
K = np.eye(3)
|
32 |
+
K[0,0] = fx
|
33 |
+
K[0,2] = cx
|
34 |
+
K[1,1] = fy
|
35 |
+
K[1,2] = cy
|
36 |
+
|
37 |
+
image_list = sorted(glob.glob(f'{imagedir}/*.jpg'))
|
38 |
+
image_list = image_list[::stride]
|
39 |
+
|
40 |
+
for t, imfile in enumerate(image_list):
|
41 |
+
image = cv2.imread(imfile)
|
42 |
+
if len(calib) > 4:
|
43 |
+
image = cv2.undistort(image, K, calib[4:])
|
44 |
+
|
45 |
+
h0, w0, _ = image.shape
|
46 |
+
h1 = int(h0 * np.sqrt((384 * 512) / (h0 * w0)))
|
47 |
+
w1 = int(w0 * np.sqrt((384 * 512) / (h0 * w0)))
|
48 |
+
|
49 |
+
image = cv2.resize(image, (w1, h1))
|
50 |
+
image = image[:h1-h1%8, :w1-w1%8]
|
51 |
+
image = torch.as_tensor(image).permute(2, 0, 1)
|
52 |
+
|
53 |
+
intrinsics = torch.as_tensor([fx, fy, cx, cy])
|
54 |
+
intrinsics[0::2] *= (w1 / w0)
|
55 |
+
intrinsics[1::2] *= (h1 / h0)
|
56 |
+
|
57 |
+
yield t, image[None], intrinsics
|
58 |
+
|
59 |
+
|
60 |
+
def save_reconstruction(droid, reconstruction_path):
|
61 |
+
|
62 |
+
from pathlib import Path
|
63 |
+
import random
|
64 |
+
import string
|
65 |
+
|
66 |
+
t = droid.video.counter.value
|
67 |
+
tstamps = droid.video.tstamp[:t].cpu().numpy()
|
68 |
+
images = droid.video.images[:t].cpu().numpy()
|
69 |
+
disps = droid.video.disps_up[:t].cpu().numpy()
|
70 |
+
poses = droid.video.poses[:t].cpu().numpy()
|
71 |
+
intrinsics = droid.video.intrinsics[:t].cpu().numpy()
|
72 |
+
|
73 |
+
Path("reconstructions/{}".format(reconstruction_path)).mkdir(parents=True, exist_ok=True)
|
74 |
+
np.save("reconstructions/{}/tstamps.npy".format(reconstruction_path), tstamps)
|
75 |
+
np.save("reconstructions/{}/images.npy".format(reconstruction_path), images)
|
76 |
+
np.save("reconstructions/{}/disps.npy".format(reconstruction_path), disps)
|
77 |
+
np.save("reconstructions/{}/poses.npy".format(reconstruction_path), poses)
|
78 |
+
np.save("reconstructions/{}/intrinsics.npy".format(reconstruction_path), intrinsics)
|
79 |
+
|
80 |
+
|
81 |
+
if __name__ == '__main__':
|
82 |
+
parser = argparse.ArgumentParser()
|
83 |
+
parser.add_argument("--imagedir", type=str, help="path to image directory")
|
84 |
+
parser.add_argument("--calib", type=str, help="path to calibration file")
|
85 |
+
parser.add_argument("--t0", default=0, type=int, help="starting frame")
|
86 |
+
parser.add_argument("--stride", default=3, type=int, help="frame stride")
|
87 |
+
|
88 |
+
parser.add_argument("--weights", default="droid.pth")
|
89 |
+
parser.add_argument("--buffer", type=int, default=512)
|
90 |
+
parser.add_argument("--image_size", default=[240, 320])
|
91 |
+
parser.add_argument("--disable_vis", action="store_true")
|
92 |
+
|
93 |
+
parser.add_argument("--beta", type=float, default=0.3, help="weight for translation / rotation components of flow")
|
94 |
+
parser.add_argument("--filter_thresh", type=float, default=2.4, help="how much motion before considering new keyframe")
|
95 |
+
parser.add_argument("--warmup", type=int, default=8, help="number of warmup frames")
|
96 |
+
parser.add_argument("--keyframe_thresh", type=float, default=4.0, help="threshold to create a new keyframe")
|
97 |
+
parser.add_argument("--frontend_thresh", type=float, default=16.0, help="add edges between frames whithin this distance")
|
98 |
+
parser.add_argument("--frontend_window", type=int, default=25, help="frontend optimization window")
|
99 |
+
parser.add_argument("--frontend_radius", type=int, default=2, help="force edges between frames within radius")
|
100 |
+
parser.add_argument("--frontend_nms", type=int, default=1, help="non-maximal supression of edges")
|
101 |
+
|
102 |
+
parser.add_argument("--backend_thresh", type=float, default=22.0)
|
103 |
+
parser.add_argument("--backend_radius", type=int, default=2)
|
104 |
+
parser.add_argument("--backend_nms", type=int, default=3)
|
105 |
+
parser.add_argument("--upsample", action="store_true")
|
106 |
+
parser.add_argument("--reconstruction_path", help="path to saved reconstruction")
|
107 |
+
args = parser.parse_args()
|
108 |
+
|
109 |
+
args.stereo = False
|
110 |
+
torch.multiprocessing.set_start_method('spawn')
|
111 |
+
|
112 |
+
droid = None
|
113 |
+
|
114 |
+
# need high resolution depths
|
115 |
+
if args.reconstruction_path is not None:
|
116 |
+
args.upsample = True
|
117 |
+
|
118 |
+
tstamps = []
|
119 |
+
for (t, image, intrinsics) in tqdm(image_stream(args.imagedir, args.calib, args.stride)):
|
120 |
+
if t < args.t0:
|
121 |
+
continue
|
122 |
+
|
123 |
+
if not args.disable_vis:
|
124 |
+
show_image(image[0])
|
125 |
+
|
126 |
+
if droid is None:
|
127 |
+
args.image_size = [image.shape[2], image.shape[3]]
|
128 |
+
droid = Droid(args)
|
129 |
+
|
130 |
+
droid.track(t, image, intrinsics=intrinsics)
|
131 |
+
|
132 |
+
if args.reconstruction_path is not None:
|
133 |
+
save_reconstruction(droid, args.reconstruction_path)
|
134 |
+
|
135 |
+
traj_est = droid.terminate(image_stream(args.imagedir, args.calib, args.stride))
|
thirdparty/DROID-SLAM/droid_slam/data_readers/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
thirdparty/DROID-SLAM/droid_slam/data_readers/augmentation.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torchvision.transforms as transforms
|
3 |
+
import numpy as np
|
4 |
+
import torch.nn.functional as F
|
5 |
+
|
6 |
+
|
7 |
+
class RGBDAugmentor:
|
8 |
+
""" perform augmentation on RGB-D video """
|
9 |
+
|
10 |
+
def __init__(self, crop_size):
|
11 |
+
self.crop_size = crop_size
|
12 |
+
self.augcolor = transforms.Compose([
|
13 |
+
transforms.ToPILImage(),
|
14 |
+
transforms.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.4/3.14),
|
15 |
+
transforms.RandomGrayscale(p=0.1),
|
16 |
+
transforms.ToTensor()])
|
17 |
+
|
18 |
+
self.max_scale = 0.25
|
19 |
+
|
20 |
+
def spatial_transform(self, images, depths, poses, intrinsics):
|
21 |
+
""" cropping and resizing """
|
22 |
+
ht, wd = images.shape[2:]
|
23 |
+
|
24 |
+
max_scale = self.max_scale
|
25 |
+
min_scale = np.log2(np.maximum(
|
26 |
+
(self.crop_size[0] + 1) / float(ht),
|
27 |
+
(self.crop_size[1] + 1) / float(wd)))
|
28 |
+
|
29 |
+
scale = 2 ** np.random.uniform(min_scale, max_scale)
|
30 |
+
intrinsics = scale * intrinsics
|
31 |
+
depths = depths.unsqueeze(dim=1)
|
32 |
+
|
33 |
+
images = F.interpolate(images, scale_factor=scale, mode='bilinear',
|
34 |
+
align_corners=False, recompute_scale_factor=True)
|
35 |
+
|
36 |
+
depths = F.interpolate(depths, scale_factor=scale, recompute_scale_factor=True)
|
37 |
+
|
38 |
+
# always perform center crop (TODO: try non-center crops)
|
39 |
+
y0 = (images.shape[2] - self.crop_size[0]) // 2
|
40 |
+
x0 = (images.shape[3] - self.crop_size[1]) // 2
|
41 |
+
|
42 |
+
intrinsics = intrinsics - torch.tensor([0.0, 0.0, x0, y0])
|
43 |
+
images = images[:, :, y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
|
44 |
+
depths = depths[:, :, y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
|
45 |
+
|
46 |
+
depths = depths.squeeze(dim=1)
|
47 |
+
return images, poses, depths, intrinsics
|
48 |
+
|
49 |
+
def color_transform(self, images):
|
50 |
+
""" color jittering """
|
51 |
+
num, ch, ht, wd = images.shape
|
52 |
+
images = images.permute(1, 2, 3, 0).reshape(ch, ht, wd*num)
|
53 |
+
images = 255 * self.augcolor(images[[2,1,0]] / 255.0)
|
54 |
+
return images[[2,1,0]].reshape(ch, ht, wd, num).permute(3,0,1,2).contiguous()
|
55 |
+
|
56 |
+
def __call__(self, images, poses, depths, intrinsics):
|
57 |
+
images = self.color_transform(images)
|
58 |
+
return self.spatial_transform(images, depths, poses, intrinsics)
|
thirdparty/DROID-SLAM/droid_slam/data_readers/base.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
import torch.utils.data as data
|
5 |
+
import torch.nn.functional as F
|
6 |
+
|
7 |
+
import csv
|
8 |
+
import os
|
9 |
+
import cv2
|
10 |
+
import math
|
11 |
+
import random
|
12 |
+
import json
|
13 |
+
import pickle
|
14 |
+
import os.path as osp
|
15 |
+
|
16 |
+
from .augmentation import RGBDAugmentor
|
17 |
+
from .rgbd_utils import *
|
18 |
+
|
19 |
+
class RGBDDataset(data.Dataset):
|
20 |
+
def __init__(self, name, datapath, n_frames=4, crop_size=[384,512], fmin=8.0, fmax=75.0, do_aug=True):
|
21 |
+
""" Base class for RGBD dataset """
|
22 |
+
self.aug = None
|
23 |
+
self.root = datapath
|
24 |
+
self.name = name
|
25 |
+
|
26 |
+
self.n_frames = n_frames
|
27 |
+
self.fmin = fmin # exclude very easy examples
|
28 |
+
self.fmax = fmax # exclude very hard examples
|
29 |
+
|
30 |
+
if do_aug:
|
31 |
+
self.aug = RGBDAugmentor(crop_size=crop_size)
|
32 |
+
|
33 |
+
# building dataset is expensive, cache so only needs to be performed once
|
34 |
+
cur_path = osp.dirname(osp.abspath(__file__))
|
35 |
+
if not os.path.isdir(osp.join(cur_path, 'cache')):
|
36 |
+
os.mkdir(osp.join(cur_path, 'cache'))
|
37 |
+
|
38 |
+
cache_path = osp.join(cur_path, 'cache', '{}.pickle'.format(self.name))
|
39 |
+
|
40 |
+
if osp.isfile(cache_path):
|
41 |
+
scene_info = pickle.load(open(cache_path, 'rb'))[0]
|
42 |
+
else:
|
43 |
+
scene_info = self._build_dataset()
|
44 |
+
with open(cache_path, 'wb') as cachefile:
|
45 |
+
pickle.dump((scene_info,), cachefile)
|
46 |
+
|
47 |
+
self.scene_info = scene_info
|
48 |
+
self._build_dataset_index()
|
49 |
+
|
50 |
+
def _build_dataset_index(self):
|
51 |
+
self.dataset_index = []
|
52 |
+
for scene in self.scene_info:
|
53 |
+
if not self.__class__.is_test_scene(scene):
|
54 |
+
graph = self.scene_info[scene]['graph']
|
55 |
+
for i in graph:
|
56 |
+
if len(graph[i][0]) > self.n_frames:
|
57 |
+
self.dataset_index.append((scene, i))
|
58 |
+
else:
|
59 |
+
print("Reserving {} for validation".format(scene))
|
60 |
+
|
61 |
+
@staticmethod
|
62 |
+
def image_read(image_file):
|
63 |
+
return cv2.imread(image_file)
|
64 |
+
|
65 |
+
@staticmethod
|
66 |
+
def depth_read(depth_file):
|
67 |
+
return np.load(depth_file)
|
68 |
+
|
69 |
+
def build_frame_graph(self, poses, depths, intrinsics, f=16, max_flow=256):
|
70 |
+
""" compute optical flow distance between all pairs of frames """
|
71 |
+
def read_disp(fn):
|
72 |
+
depth = self.__class__.depth_read(fn)[f//2::f, f//2::f]
|
73 |
+
depth[depth < 0.01] = np.mean(depth)
|
74 |
+
return 1.0 / depth
|
75 |
+
|
76 |
+
poses = np.array(poses)
|
77 |
+
intrinsics = np.array(intrinsics) / f
|
78 |
+
|
79 |
+
disps = np.stack(list(map(read_disp, depths)), 0)
|
80 |
+
d = f * compute_distance_matrix_flow(poses, disps, intrinsics)
|
81 |
+
|
82 |
+
# uncomment for nice visualization
|
83 |
+
# import matplotlib.pyplot as plt
|
84 |
+
# plt.imshow(d)
|
85 |
+
# plt.show()
|
86 |
+
|
87 |
+
graph = {}
|
88 |
+
for i in range(d.shape[0]):
|
89 |
+
j, = np.where(d[i] < max_flow)
|
90 |
+
graph[i] = (j, d[i,j])
|
91 |
+
|
92 |
+
return graph
|
93 |
+
|
94 |
+
def __getitem__(self, index):
|
95 |
+
""" return training video """
|
96 |
+
|
97 |
+
index = index % len(self.dataset_index)
|
98 |
+
scene_id, ix = self.dataset_index[index]
|
99 |
+
|
100 |
+
frame_graph = self.scene_info[scene_id]['graph']
|
101 |
+
images_list = self.scene_info[scene_id]['images']
|
102 |
+
depths_list = self.scene_info[scene_id]['depths']
|
103 |
+
poses_list = self.scene_info[scene_id]['poses']
|
104 |
+
intrinsics_list = self.scene_info[scene_id]['intrinsics']
|
105 |
+
|
106 |
+
inds = [ ix ]
|
107 |
+
while len(inds) < self.n_frames:
|
108 |
+
# get other frames within flow threshold
|
109 |
+
k = (frame_graph[ix][1] > self.fmin) & (frame_graph[ix][1] < self.fmax)
|
110 |
+
frames = frame_graph[ix][0][k]
|
111 |
+
|
112 |
+
# prefer frames forward in time
|
113 |
+
if np.count_nonzero(frames[frames > ix]):
|
114 |
+
ix = np.random.choice(frames[frames > ix])
|
115 |
+
|
116 |
+
elif np.count_nonzero(frames):
|
117 |
+
ix = np.random.choice(frames)
|
118 |
+
|
119 |
+
inds += [ ix ]
|
120 |
+
|
121 |
+
images, depths, poses, intrinsics = [], [], [], []
|
122 |
+
for i in inds:
|
123 |
+
images.append(self.__class__.image_read(images_list[i]))
|
124 |
+
depths.append(self.__class__.depth_read(depths_list[i]))
|
125 |
+
poses.append(poses_list[i])
|
126 |
+
intrinsics.append(intrinsics_list[i])
|
127 |
+
|
128 |
+
images = np.stack(images).astype(np.float32)
|
129 |
+
depths = np.stack(depths).astype(np.float32)
|
130 |
+
poses = np.stack(poses).astype(np.float32)
|
131 |
+
intrinsics = np.stack(intrinsics).astype(np.float32)
|
132 |
+
|
133 |
+
images = torch.from_numpy(images).float()
|
134 |
+
images = images.permute(0, 3, 1, 2)
|
135 |
+
|
136 |
+
disps = torch.from_numpy(1.0 / depths)
|
137 |
+
poses = torch.from_numpy(poses)
|
138 |
+
intrinsics = torch.from_numpy(intrinsics)
|
139 |
+
|
140 |
+
if self.aug is not None:
|
141 |
+
images, poses, disps, intrinsics = \
|
142 |
+
self.aug(images, poses, disps, intrinsics)
|
143 |
+
|
144 |
+
# scale scene
|
145 |
+
if len(disps[disps>0.01]) > 0:
|
146 |
+
s = disps[disps>0.01].mean()
|
147 |
+
disps = disps / s
|
148 |
+
poses[...,:3] *= s
|
149 |
+
|
150 |
+
return images, poses, disps, intrinsics
|
151 |
+
|
152 |
+
def __len__(self):
|
153 |
+
return len(self.dataset_index)
|
154 |
+
|
155 |
+
def __imul__(self, x):
|
156 |
+
self.dataset_index *= x
|
157 |
+
return self
|
thirdparty/DROID-SLAM/droid_slam/data_readers/factory.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import pickle
|
3 |
+
import os
|
4 |
+
import os.path as osp
|
5 |
+
|
6 |
+
# RGBD-Dataset
|
7 |
+
from .tartan import TartanAir
|
8 |
+
|
9 |
+
from .stream import ImageStream
|
10 |
+
from .stream import StereoStream
|
11 |
+
from .stream import RGBDStream
|
12 |
+
|
13 |
+
# streaming datasets for inference
|
14 |
+
from .tartan import TartanAirStream
|
15 |
+
from .tartan import TartanAirTestStream
|
16 |
+
|
17 |
+
def dataset_factory(dataset_list, **kwargs):
|
18 |
+
""" create a combined dataset """
|
19 |
+
|
20 |
+
from torch.utils.data import ConcatDataset
|
21 |
+
|
22 |
+
dataset_map = { 'tartan': (TartanAir, ) }
|
23 |
+
db_list = []
|
24 |
+
for key in dataset_list:
|
25 |
+
# cache datasets for faster future loading
|
26 |
+
db = dataset_map[key][0](**kwargs)
|
27 |
+
|
28 |
+
print("Dataset {} has {} images".format(key, len(db)))
|
29 |
+
db_list.append(db)
|
30 |
+
|
31 |
+
return ConcatDataset(db_list)
|
32 |
+
|
33 |
+
|
34 |
+
def create_datastream(dataset_path, **kwargs):
|
35 |
+
""" create data_loader to stream images 1 by 1 """
|
36 |
+
|
37 |
+
from torch.utils.data import DataLoader
|
38 |
+
|
39 |
+
if osp.isfile(osp.join(dataset_path, 'calibration.txt')):
|
40 |
+
db = ETH3DStream(dataset_path, **kwargs)
|
41 |
+
|
42 |
+
elif osp.isdir(osp.join(dataset_path, 'image_left')):
|
43 |
+
db = TartanAirStream(dataset_path, **kwargs)
|
44 |
+
|
45 |
+
elif osp.isfile(osp.join(dataset_path, 'rgb.txt')):
|
46 |
+
db = TUMStream(dataset_path, **kwargs)
|
47 |
+
|
48 |
+
elif osp.isdir(osp.join(dataset_path, 'mav0')):
|
49 |
+
db = EurocStream(dataset_path, **kwargs)
|
50 |
+
|
51 |
+
elif osp.isfile(osp.join(dataset_path, 'calib.txt')):
|
52 |
+
db = KITTIStream(dataset_path, **kwargs)
|
53 |
+
|
54 |
+
else:
|
55 |
+
# db = TartanAirStream(dataset_path, **kwargs)
|
56 |
+
db = TartanAirTestStream(dataset_path, **kwargs)
|
57 |
+
|
58 |
+
stream = DataLoader(db, shuffle=False, batch_size=1, num_workers=4)
|
59 |
+
return stream
|
60 |
+
|
61 |
+
|
62 |
+
def create_imagestream(dataset_path, **kwargs):
|
63 |
+
""" create data_loader to stream images 1 by 1 """
|
64 |
+
from torch.utils.data import DataLoader
|
65 |
+
|
66 |
+
db = ImageStream(dataset_path, **kwargs)
|
67 |
+
return DataLoader(db, shuffle=False, batch_size=1, num_workers=4)
|
68 |
+
|
69 |
+
def create_stereostream(dataset_path, **kwargs):
|
70 |
+
""" create data_loader to stream images 1 by 1 """
|
71 |
+
from torch.utils.data import DataLoader
|
72 |
+
|
73 |
+
db = StereoStream(dataset_path, **kwargs)
|
74 |
+
return DataLoader(db, shuffle=False, batch_size=1, num_workers=4)
|
75 |
+
|
76 |
+
def create_rgbdstream(dataset_path, **kwargs):
|
77 |
+
""" create data_loader to stream images 1 by 1 """
|
78 |
+
from torch.utils.data import DataLoader
|
79 |
+
|
80 |
+
db = RGBDStream(dataset_path, **kwargs)
|
81 |
+
return DataLoader(db, shuffle=False, batch_size=1, num_workers=4)
|
82 |
+
|
thirdparty/DROID-SLAM/droid_slam/data_readers/rgbd_utils.py
ADDED
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import os.path as osp
|
3 |
+
|
4 |
+
import torch
|
5 |
+
from lietorch import SE3
|
6 |
+
|
7 |
+
import geom.projective_ops as pops
|
8 |
+
from scipy.spatial.transform import Rotation
|
9 |
+
|
10 |
+
|
11 |
+
def parse_list(filepath, skiprows=0):
|
12 |
+
""" read list data """
|
13 |
+
data = np.loadtxt(filepath, delimiter=' ', dtype=np.unicode_, skiprows=skiprows)
|
14 |
+
return data
|
15 |
+
|
16 |
+
def associate_frames(tstamp_image, tstamp_depth, tstamp_pose, max_dt=1.0):
|
17 |
+
""" pair images, depths, and poses """
|
18 |
+
associations = []
|
19 |
+
for i, t in enumerate(tstamp_image):
|
20 |
+
if tstamp_pose is None:
|
21 |
+
j = np.argmin(np.abs(tstamp_depth - t))
|
22 |
+
if (np.abs(tstamp_depth[j] - t) < max_dt):
|
23 |
+
associations.append((i, j))
|
24 |
+
|
25 |
+
else:
|
26 |
+
j = np.argmin(np.abs(tstamp_depth - t))
|
27 |
+
k = np.argmin(np.abs(tstamp_pose - t))
|
28 |
+
|
29 |
+
if (np.abs(tstamp_depth[j] - t) < max_dt) and \
|
30 |
+
(np.abs(tstamp_pose[k] - t) < max_dt):
|
31 |
+
associations.append((i, j, k))
|
32 |
+
|
33 |
+
return associations
|
34 |
+
|
35 |
+
def loadtum(datapath, frame_rate=-1):
|
36 |
+
""" read video data in tum-rgbd format """
|
37 |
+
if osp.isfile(osp.join(datapath, 'groundtruth.txt')):
|
38 |
+
pose_list = osp.join(datapath, 'groundtruth.txt')
|
39 |
+
|
40 |
+
elif osp.isfile(osp.join(datapath, 'pose.txt')):
|
41 |
+
pose_list = osp.join(datapath, 'pose.txt')
|
42 |
+
|
43 |
+
else:
|
44 |
+
return None, None, None, None
|
45 |
+
|
46 |
+
image_list = osp.join(datapath, 'rgb.txt')
|
47 |
+
depth_list = osp.join(datapath, 'depth.txt')
|
48 |
+
|
49 |
+
calib_path = osp.join(datapath, 'calibration.txt')
|
50 |
+
intrinsic = None
|
51 |
+
if osp.isfile(calib_path):
|
52 |
+
intrinsic = np.loadtxt(calib_path, delimiter=' ')
|
53 |
+
intrinsic = intrinsic.astype(np.float64)
|
54 |
+
|
55 |
+
image_data = parse_list(image_list)
|
56 |
+
depth_data = parse_list(depth_list)
|
57 |
+
pose_data = parse_list(pose_list, skiprows=1)
|
58 |
+
pose_vecs = pose_data[:,1:].astype(np.float64)
|
59 |
+
|
60 |
+
tstamp_image = image_data[:,0].astype(np.float64)
|
61 |
+
tstamp_depth = depth_data[:,0].astype(np.float64)
|
62 |
+
tstamp_pose = pose_data[:,0].astype(np.float64)
|
63 |
+
associations = associate_frames(tstamp_image, tstamp_depth, tstamp_pose)
|
64 |
+
|
65 |
+
# print(len(tstamp_image))
|
66 |
+
# print(len(associations))
|
67 |
+
|
68 |
+
indicies = range(len(associations))[::5]
|
69 |
+
|
70 |
+
# indicies = [ 0 ]
|
71 |
+
# for i in range(1, len(associations)):
|
72 |
+
# t0 = tstamp_image[associations[indicies[-1]][0]]
|
73 |
+
# t1 = tstamp_image[associations[i][0]]
|
74 |
+
# if t1 - t0 > 1.0 / frame_rate:
|
75 |
+
# indicies += [ i ]
|
76 |
+
|
77 |
+
images, poses, depths, intrinsics, tstamps = [], [], [], [], []
|
78 |
+
for ix in indicies:
|
79 |
+
(i, j, k) = associations[ix]
|
80 |
+
images += [ osp.join(datapath, image_data[i,1]) ]
|
81 |
+
depths += [ osp.join(datapath, depth_data[j,1]) ]
|
82 |
+
poses += [ pose_vecs[k] ]
|
83 |
+
tstamps += [ tstamp_image[i] ]
|
84 |
+
|
85 |
+
if intrinsic is not None:
|
86 |
+
intrinsics += [ intrinsic ]
|
87 |
+
|
88 |
+
return images, depths, poses, intrinsics, tstamps
|
89 |
+
|
90 |
+
|
91 |
+
def all_pairs_distance_matrix(poses, beta=2.5):
|
92 |
+
""" compute distance matrix between all pairs of poses """
|
93 |
+
poses = np.array(poses, dtype=np.float32)
|
94 |
+
poses[:,:3] *= beta # scale to balence rot + trans
|
95 |
+
poses = SE3(torch.from_numpy(poses))
|
96 |
+
|
97 |
+
r = (poses[:,None].inv() * poses[None,:]).log()
|
98 |
+
return r.norm(dim=-1).cpu().numpy()
|
99 |
+
|
100 |
+
def pose_matrix_to_quaternion(pose):
|
101 |
+
""" convert 4x4 pose matrix to (t, q) """
|
102 |
+
q = Rotation.from_matrix(pose[:3, :3]).as_quat()
|
103 |
+
return np.concatenate([pose[:3, 3], q], axis=0)
|
104 |
+
|
105 |
+
def compute_distance_matrix_flow(poses, disps, intrinsics):
|
106 |
+
""" compute flow magnitude between all pairs of frames """
|
107 |
+
if not isinstance(poses, SE3):
|
108 |
+
poses = torch.from_numpy(poses).float().cuda()[None]
|
109 |
+
poses = SE3(poses).inv()
|
110 |
+
|
111 |
+
disps = torch.from_numpy(disps).float().cuda()[None]
|
112 |
+
intrinsics = torch.from_numpy(intrinsics).float().cuda()[None]
|
113 |
+
|
114 |
+
N = poses.shape[1]
|
115 |
+
|
116 |
+
ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N), indexing='ij')
|
117 |
+
ii = ii.reshape(-1).cuda()
|
118 |
+
jj = jj.reshape(-1).cuda()
|
119 |
+
|
120 |
+
MAX_FLOW = 100.0
|
121 |
+
matrix = np.zeros((N, N), dtype=np.float32)
|
122 |
+
|
123 |
+
s = 2048
|
124 |
+
for i in range(0, ii.shape[0], s):
|
125 |
+
flow1, val1 = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
|
126 |
+
flow2, val2 = pops.induced_flow(poses, disps, intrinsics, jj[i:i+s], ii[i:i+s])
|
127 |
+
|
128 |
+
flow = torch.stack([flow1, flow2], dim=2)
|
129 |
+
val = torch.stack([val1, val2], dim=2)
|
130 |
+
|
131 |
+
mag = flow.norm(dim=-1).clamp(max=MAX_FLOW)
|
132 |
+
mag = mag.view(mag.shape[1], -1)
|
133 |
+
val = val.view(val.shape[1], -1)
|
134 |
+
|
135 |
+
mag = (mag * val).mean(-1) / val.mean(-1)
|
136 |
+
mag[val.mean(-1) < 0.7] = np.inf
|
137 |
+
|
138 |
+
i1 = ii[i:i+s].cpu().numpy()
|
139 |
+
j1 = jj[i:i+s].cpu().numpy()
|
140 |
+
matrix[i1, j1] = mag.cpu().numpy()
|
141 |
+
|
142 |
+
return matrix
|
143 |
+
|
144 |
+
|
145 |
+
def compute_distance_matrix_flow2(poses, disps, intrinsics, beta=0.4):
|
146 |
+
""" compute flow magnitude between all pairs of frames """
|
147 |
+
# if not isinstance(poses, SE3):
|
148 |
+
# poses = torch.from_numpy(poses).float().cuda()[None]
|
149 |
+
# poses = SE3(poses).inv()
|
150 |
+
|
151 |
+
# disps = torch.from_numpy(disps).float().cuda()[None]
|
152 |
+
# intrinsics = torch.from_numpy(intrinsics).float().cuda()[None]
|
153 |
+
|
154 |
+
N = poses.shape[1]
|
155 |
+
|
156 |
+
ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N), indexing='ij')
|
157 |
+
ii = ii.reshape(-1)
|
158 |
+
jj = jj.reshape(-1)
|
159 |
+
|
160 |
+
MAX_FLOW = 128.0
|
161 |
+
matrix = np.zeros((N, N), dtype=np.float32)
|
162 |
+
|
163 |
+
s = 2048
|
164 |
+
for i in range(0, ii.shape[0], s):
|
165 |
+
flow1a, val1a = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s], tonly=True)
|
166 |
+
flow1b, val1b = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
|
167 |
+
flow2a, val2a = pops.induced_flow(poses, disps, intrinsics, jj[i:i+s], ii[i:i+s], tonly=True)
|
168 |
+
flow2b, val2b = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
|
169 |
+
|
170 |
+
flow1 = flow1a + beta * flow1b
|
171 |
+
val1 = val1a * val2b
|
172 |
+
|
173 |
+
flow2 = flow2a + beta * flow2b
|
174 |
+
val2 = val2a * val2b
|
175 |
+
|
176 |
+
flow = torch.stack([flow1, flow2], dim=2)
|
177 |
+
val = torch.stack([val1, val2], dim=2)
|
178 |
+
|
179 |
+
mag = flow.norm(dim=-1).clamp(max=MAX_FLOW)
|
180 |
+
mag = mag.view(mag.shape[1], -1)
|
181 |
+
val = val.view(val.shape[1], -1)
|
182 |
+
|
183 |
+
mag = (mag * val).mean(-1) / val.mean(-1)
|
184 |
+
mag[val.mean(-1) < 0.8] = np.inf
|
185 |
+
|
186 |
+
i1 = ii[i:i+s].cpu().numpy()
|
187 |
+
j1 = jj[i:i+s].cpu().numpy()
|
188 |
+
matrix[i1, j1] = mag.cpu().numpy()
|
189 |
+
|
190 |
+
return matrix
|
thirdparty/DROID-SLAM/droid_slam/data_readers/stream.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
import torch.utils.data as data
|
5 |
+
import torch.nn.functional as F
|
6 |
+
|
7 |
+
import csv
|
8 |
+
import os
|
9 |
+
import cv2
|
10 |
+
import math
|
11 |
+
import random
|
12 |
+
import json
|
13 |
+
import pickle
|
14 |
+
import os.path as osp
|
15 |
+
|
16 |
+
from .rgbd_utils import *
|
17 |
+
|
18 |
+
class RGBDStream(data.Dataset):
|
19 |
+
def __init__(self, datapath, frame_rate=-1, image_size=[384,512], crop_size=[0,0]):
|
20 |
+
self.datapath = datapath
|
21 |
+
self.frame_rate = frame_rate
|
22 |
+
self.image_size = image_size
|
23 |
+
self.crop_size = crop_size
|
24 |
+
self._build_dataset_index()
|
25 |
+
|
26 |
+
@staticmethod
|
27 |
+
def image_read(image_file):
|
28 |
+
return cv2.imread(image_file)
|
29 |
+
|
30 |
+
@staticmethod
|
31 |
+
def depth_read(depth_file):
|
32 |
+
return np.load(depth_file)
|
33 |
+
|
34 |
+
def __len__(self):
|
35 |
+
return len(self.images)
|
36 |
+
|
37 |
+
def __getitem__(self, index):
|
38 |
+
""" return training video """
|
39 |
+
image = self.__class__.image_read(self.images[index])
|
40 |
+
image = torch.from_numpy(image).float()
|
41 |
+
image = image.permute(2, 0, 1)
|
42 |
+
|
43 |
+
try:
|
44 |
+
tstamp = self.tstamps[index]
|
45 |
+
except:
|
46 |
+
tstamp = index
|
47 |
+
|
48 |
+
pose = torch.from_numpy(self.poses[index]).float()
|
49 |
+
intrinsic = torch.from_numpy(self.intrinsics[index]).float()
|
50 |
+
|
51 |
+
# resize image
|
52 |
+
sx = self.image_size[1] / image.shape[2]
|
53 |
+
sy = self.image_size[0] / image.shape[1]
|
54 |
+
|
55 |
+
image = F.interpolate(image[None], self.image_size, mode='bilinear', align_corners=False)[0]
|
56 |
+
|
57 |
+
fx, fy, cx, cy = intrinsic.unbind(dim=0)
|
58 |
+
fx, cx = sx * fx, sx * cx
|
59 |
+
fy, cy = sy * fy, sy * cy
|
60 |
+
|
61 |
+
# crop image
|
62 |
+
if self.crop_size[0] > 0:
|
63 |
+
cy = cy - self.crop_size[0]
|
64 |
+
image = image[:,self.crop_size[0]:-self.crop_size[0],:]
|
65 |
+
|
66 |
+
if self.crop_size[1] > 0:
|
67 |
+
cx = cx - self.crop_size[1]
|
68 |
+
image = image[:,:,self.crop_size[1]:-self.crop_size[1]]
|
69 |
+
|
70 |
+
intrinsic = torch.stack([fx, fy, cx, cy])
|
71 |
+
|
72 |
+
return tstamp, image, pose, intrinsic
|
73 |
+
|
74 |
+
|
75 |
+
class ImageStream(data.Dataset):
|
76 |
+
def __init__(self, datapath, intrinsics, rate=1, image_size=[384,512]):
|
77 |
+
rgb_list = osp.join(datapath, 'rgb.txt')
|
78 |
+
if os.path.isfile(rgb_list):
|
79 |
+
rgb_list = np.loadtxt(rgb_list, delimiter=' ', dtype=np.unicode_)
|
80 |
+
self.timestamps = rgb_list[:,0].astype(np.float)
|
81 |
+
self.images = [os.path.join(datapath, x) for x in rgb_list[:,1]]
|
82 |
+
self.images = self.images[::rate]
|
83 |
+
self.timestamps = self.timestamps[::rate]
|
84 |
+
|
85 |
+
else:
|
86 |
+
import glob
|
87 |
+
self.images = sorted(glob.glob(osp.join(datapath, '*.jpg'))) + sorted(glob.glob(osp.join(datapath, '*.png')))
|
88 |
+
self.images = self.images[::rate]
|
89 |
+
|
90 |
+
self.intrinsics = intrinsics
|
91 |
+
self.image_size = image_size
|
92 |
+
|
93 |
+
def __len__(self):
|
94 |
+
return len(self.images)
|
95 |
+
|
96 |
+
@staticmethod
|
97 |
+
def image_read(imfile):
|
98 |
+
return cv2.imread(imfile)
|
99 |
+
|
100 |
+
def __getitem__(self, index):
|
101 |
+
""" return training video """
|
102 |
+
image = self.__class__.image_read(self.images[index])
|
103 |
+
|
104 |
+
try:
|
105 |
+
tstamp = self.timestamps[index]
|
106 |
+
except:
|
107 |
+
tstamp = index
|
108 |
+
|
109 |
+
ht0, wd0 = image.shape[:2]
|
110 |
+
ht1, wd1 = self.image_size
|
111 |
+
|
112 |
+
intrinsics = torch.as_tensor(self.intrinsics)
|
113 |
+
intrinsics[0] *= wd1 / wd0
|
114 |
+
intrinsics[1] *= ht1 / ht0
|
115 |
+
intrinsics[2] *= wd1 / wd0
|
116 |
+
intrinsics[3] *= ht1 / ht0
|
117 |
+
|
118 |
+
# resize image
|
119 |
+
ikwargs = {'mode': 'bilinear', 'align_corners': True}
|
120 |
+
image = torch.from_numpy(image).float().permute(2, 0, 1)
|
121 |
+
image = F.interpolate(image[None], self.image_size, **ikwargs)[0]
|
122 |
+
|
123 |
+
return tstamp, image, intrinsics
|
124 |
+
|
125 |
+
|
126 |
+
|
127 |
+
class StereoStream(data.Dataset):
|
128 |
+
def __init__(self, datapath, intrinsics, rate=1, image_size=[384,512],
|
129 |
+
map_left=None, map_right=None, left_root='image_left', right_root='image_right'):
|
130 |
+
import glob
|
131 |
+
self.intrinsics = intrinsics
|
132 |
+
self.image_size = image_size
|
133 |
+
|
134 |
+
imgs = sorted(glob.glob(osp.join(datapath, left_root, '*.png')))[::rate]
|
135 |
+
self.images_l = []
|
136 |
+
self.images_r = []
|
137 |
+
self.tstamps = []
|
138 |
+
|
139 |
+
for img_l in imgs:
|
140 |
+
img_r = img_l.replace(left_root, right_root)
|
141 |
+
if os.path.isfile(img_r):
|
142 |
+
t = np.float(img_l.split('/')[-1].replace('.png', ''))
|
143 |
+
self.tstamps.append(t)
|
144 |
+
self.images_l += [ img_l ]
|
145 |
+
self.images_r += [ img_r ]
|
146 |
+
|
147 |
+
self.map_left = map_left
|
148 |
+
self.map_right = map_right
|
149 |
+
|
150 |
+
def __len__(self):
|
151 |
+
return len(self.images_l)
|
152 |
+
|
153 |
+
@staticmethod
|
154 |
+
def image_read(imfile, imap=None):
|
155 |
+
image = cv2.imread(imfile)
|
156 |
+
if imap is not None:
|
157 |
+
image = cv2.remap(image, imap[0], imap[1], interpolation=cv2.INTER_LINEAR)
|
158 |
+
return image
|
159 |
+
|
160 |
+
def __getitem__(self, index):
|
161 |
+
""" return training video """
|
162 |
+
tstamp = self.tstamps[index]
|
163 |
+
image_l = self.__class__.image_read(self.images_l[index], self.map_left)
|
164 |
+
image_r = self.__class__.image_read(self.images_r[index], self.map_right)
|
165 |
+
|
166 |
+
ht0, wd0 = image_l.shape[:2]
|
167 |
+
ht1, wd1 = self.image_size
|
168 |
+
|
169 |
+
intrinsics = torch.as_tensor(self.intrinsics)
|
170 |
+
intrinsics[0] *= wd1 / wd0
|
171 |
+
intrinsics[1] *= ht1 / ht0
|
172 |
+
intrinsics[2] *= wd1 / wd0
|
173 |
+
intrinsics[3] *= ht1 / ht0
|
174 |
+
|
175 |
+
image_l = torch.from_numpy(image_l).float().permute(2, 0, 1)
|
176 |
+
image_r = torch.from_numpy(image_r).float().permute(2, 0, 1)
|
177 |
+
|
178 |
+
# resize image
|
179 |
+
ikwargs = {'mode': 'bilinear', 'align_corners': True}
|
180 |
+
image_l = F.interpolate(image_l[None], self.image_size, **ikwargs)[0]
|
181 |
+
image_r = F.interpolate(image_r[None], self.image_size, **ikwargs)[0]
|
182 |
+
|
183 |
+
return tstamp, image_l, image_r, intrinsics
|
184 |
+
|
185 |
+
|
186 |
+
|
187 |
+
# class RGBDStream(data.Dataset):
|
188 |
+
# def __init__(self, datapath, intrinsics=None, rate=1, image_size=[384,512]):
|
189 |
+
# assoc_file = osp.join(datapath, 'associated.txt')
|
190 |
+
# assoc_list = np.loadtxt(assoc_file, delimiter=' ', dtype=np.unicode_)
|
191 |
+
|
192 |
+
# self.intrinsics = intrinsics
|
193 |
+
# self.image_size = image_size
|
194 |
+
|
195 |
+
# self.timestamps = assoc_list[:,0].astype(np.float)[::rate]
|
196 |
+
# self.images = [os.path.join(datapath, x) for x in assoc_list[:,1]][::rate]
|
197 |
+
# self.depths = [os.path.join(datapath, x) for x in assoc_list[:,3]][::rate]
|
198 |
+
|
199 |
+
# def __len__(self):
|
200 |
+
# return len(self.images)
|
201 |
+
|
202 |
+
# @staticmethod
|
203 |
+
# def image_read(imfile):
|
204 |
+
# return cv2.imread(imfile)
|
205 |
+
|
206 |
+
# @staticmethod
|
207 |
+
# def depth_read(depth_file):
|
208 |
+
# depth = cv2.imread(depth_file, cv2.IMREAD_ANYDEPTH)
|
209 |
+
# return depth.astype(np.float32) / 5000.0
|
210 |
+
|
211 |
+
# def __getitem__(self, index):
|
212 |
+
# """ return training video """
|
213 |
+
# tstamp = self.timestamps[index]
|
214 |
+
# image = self.__class__.image_read(self.images[index])
|
215 |
+
# depth = self.__class__.depth_read(self.depths[index])
|
216 |
+
|
217 |
+
# ht0, wd0 = image.shape[:2]
|
218 |
+
# ht1, wd1 = self.image_size
|
219 |
+
|
220 |
+
# intrinsics = torch.as_tensor(self.intrinsics)
|
221 |
+
# intrinsics[0] *= wd1 / wd0
|
222 |
+
# intrinsics[1] *= ht1 / ht0
|
223 |
+
# intrinsics[2] *= wd1 / wd0
|
224 |
+
# intrinsics[3] *= ht1 / ht0
|
225 |
+
|
226 |
+
# # resize image
|
227 |
+
# ikwargs = {'mode': 'bilinear', 'align_corners': True}
|
228 |
+
# image = torch.from_numpy(image).float().permute(2, 0, 1)
|
229 |
+
# image = F.interpolate(image[None], self.image_size, **ikwargs)[0]
|
230 |
+
|
231 |
+
# depth = torch.from_numpy(depth).float()[None,None]
|
232 |
+
# depth = F.interpolate(depth, self.image_size, mode='nearest').squeeze()
|
233 |
+
|
234 |
+
# return tstamp, image, depth, intrinsics
|
thirdparty/DROID-SLAM/droid_slam/data_readers/tartan.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
import glob
|
5 |
+
import cv2
|
6 |
+
import os
|
7 |
+
import os.path as osp
|
8 |
+
|
9 |
+
from lietorch import SE3
|
10 |
+
from .base import RGBDDataset
|
11 |
+
from .stream import RGBDStream
|
12 |
+
|
13 |
+
cur_path = osp.dirname(osp.abspath(__file__))
|
14 |
+
test_split = osp.join(cur_path, 'tartan_test.txt')
|
15 |
+
test_split = open(test_split).read().split()
|
16 |
+
|
17 |
+
|
18 |
+
class TartanAir(RGBDDataset):
|
19 |
+
|
20 |
+
# scale depths to balance rot & trans
|
21 |
+
DEPTH_SCALE = 5.0
|
22 |
+
|
23 |
+
def __init__(self, mode='training', **kwargs):
|
24 |
+
self.mode = mode
|
25 |
+
self.n_frames = 2
|
26 |
+
super(TartanAir, self).__init__(name='TartanAir', **kwargs)
|
27 |
+
|
28 |
+
@staticmethod
|
29 |
+
def is_test_scene(scene):
|
30 |
+
# print(scene, any(x in scene for x in test_split))
|
31 |
+
return any(x in scene for x in test_split)
|
32 |
+
|
33 |
+
def _build_dataset(self):
|
34 |
+
from tqdm import tqdm
|
35 |
+
print("Building TartanAir dataset")
|
36 |
+
|
37 |
+
scene_info = {}
|
38 |
+
scenes = glob.glob(osp.join(self.root, '*/*/*/*'))
|
39 |
+
for scene in tqdm(sorted(scenes)):
|
40 |
+
images = sorted(glob.glob(osp.join(scene, 'image_left/*.png')))
|
41 |
+
depths = sorted(glob.glob(osp.join(scene, 'depth_left/*.npy')))
|
42 |
+
|
43 |
+
poses = np.loadtxt(osp.join(scene, 'pose_left.txt'), delimiter=' ')
|
44 |
+
poses = poses[:, [1, 2, 0, 4, 5, 3, 6]]
|
45 |
+
poses[:,:3] /= TartanAir.DEPTH_SCALE
|
46 |
+
intrinsics = [TartanAir.calib_read()] * len(images)
|
47 |
+
|
48 |
+
# graph of co-visible frames based on flow
|
49 |
+
graph = self.build_frame_graph(poses, depths, intrinsics)
|
50 |
+
|
51 |
+
scene = '/'.join(scene.split('/'))
|
52 |
+
scene_info[scene] = {'images': images, 'depths': depths,
|
53 |
+
'poses': poses, 'intrinsics': intrinsics, 'graph': graph}
|
54 |
+
|
55 |
+
return scene_info
|
56 |
+
|
57 |
+
@staticmethod
|
58 |
+
def calib_read():
|
59 |
+
return np.array([320.0, 320.0, 320.0, 240.0])
|
60 |
+
|
61 |
+
@staticmethod
|
62 |
+
def image_read(image_file):
|
63 |
+
return cv2.imread(image_file)
|
64 |
+
|
65 |
+
@staticmethod
|
66 |
+
def depth_read(depth_file):
|
67 |
+
depth = np.load(depth_file) / TartanAir.DEPTH_SCALE
|
68 |
+
depth[depth==np.nan] = 1.0
|
69 |
+
depth[depth==np.inf] = 1.0
|
70 |
+
return depth
|
71 |
+
|
72 |
+
|
73 |
+
class TartanAirStream(RGBDStream):
|
74 |
+
def __init__(self, datapath, **kwargs):
|
75 |
+
super(TartanAirStream, self).__init__(datapath=datapath, **kwargs)
|
76 |
+
|
77 |
+
def _build_dataset_index(self):
|
78 |
+
""" build list of images, poses, depths, and intrinsics """
|
79 |
+
self.root = 'datasets/TartanAir'
|
80 |
+
|
81 |
+
scene = osp.join(self.root, self.datapath)
|
82 |
+
image_glob = osp.join(scene, 'image_left/*.png')
|
83 |
+
images = sorted(glob.glob(image_glob))
|
84 |
+
|
85 |
+
poses = np.loadtxt(osp.join(scene, 'pose_left.txt'), delimiter=' ')
|
86 |
+
poses = poses[:, [1, 2, 0, 4, 5, 3, 6]]
|
87 |
+
|
88 |
+
poses = SE3(torch.as_tensor(poses))
|
89 |
+
poses = poses[[0]].inv() * poses
|
90 |
+
poses = poses.data.cpu().numpy()
|
91 |
+
|
92 |
+
intrinsic = self.calib_read(self.datapath)
|
93 |
+
intrinsics = np.tile(intrinsic[None], (len(images), 1))
|
94 |
+
|
95 |
+
self.images = images[::int(self.frame_rate)]
|
96 |
+
self.poses = poses[::int(self.frame_rate)]
|
97 |
+
self.intrinsics = intrinsics[::int(self.frame_rate)]
|
98 |
+
|
99 |
+
@staticmethod
|
100 |
+
def calib_read(datapath):
|
101 |
+
return np.array([320.0, 320.0, 320.0, 240.0])
|
102 |
+
|
103 |
+
@staticmethod
|
104 |
+
def image_read(image_file):
|
105 |
+
return cv2.imread(image_file)
|
106 |
+
|
107 |
+
|
108 |
+
class TartanAirTestStream(RGBDStream):
|
109 |
+
def __init__(self, datapath, **kwargs):
|
110 |
+
super(TartanAirTestStream, self).__init__(datapath=datapath, **kwargs)
|
111 |
+
|
112 |
+
def _build_dataset_index(self):
|
113 |
+
""" build list of images, poses, depths, and intrinsics """
|
114 |
+
self.root = 'datasets/mono'
|
115 |
+
image_glob = osp.join(self.root, self.datapath, '*.png')
|
116 |
+
images = sorted(glob.glob(image_glob))
|
117 |
+
|
118 |
+
poses = np.loadtxt(osp.join(self.root, 'mono_gt', self.datapath + '.txt'), delimiter=' ')
|
119 |
+
poses = poses[:, [1, 2, 0, 4, 5, 3, 6]]
|
120 |
+
|
121 |
+
poses = SE3(torch.as_tensor(poses))
|
122 |
+
poses = poses[[0]].inv() * poses
|
123 |
+
poses = poses.data.cpu().numpy()
|
124 |
+
|
125 |
+
intrinsic = self.calib_read(self.datapath)
|
126 |
+
intrinsics = np.tile(intrinsic[None], (len(images), 1))
|
127 |
+
|
128 |
+
self.images = images[::int(self.frame_rate)]
|
129 |
+
self.poses = poses[::int(self.frame_rate)]
|
130 |
+
self.intrinsics = intrinsics[::int(self.frame_rate)]
|
131 |
+
|
132 |
+
@staticmethod
|
133 |
+
def calib_read(datapath):
|
134 |
+
return np.array([320.0, 320.0, 320.0, 240.0])
|
135 |
+
|
136 |
+
@staticmethod
|
137 |
+
def image_read(image_file):
|
138 |
+
return cv2.imread(image_file)
|
thirdparty/DROID-SLAM/droid_slam/data_readers/tartan_test.txt
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
abandonedfactory/abandonedfactory/Easy/P011
|
2 |
+
abandonedfactory/abandonedfactory/Hard/P011
|
3 |
+
abandonedfactory_night/abandonedfactory_night/Easy/P013
|
4 |
+
abandonedfactory_night/abandonedfactory_night/Hard/P014
|
5 |
+
amusement/amusement/Easy/P008
|
6 |
+
amusement/amusement/Hard/P007
|
7 |
+
carwelding/carwelding/Easy/P007
|
8 |
+
endofworld/endofworld/Easy/P009
|
9 |
+
gascola/gascola/Easy/P008
|
10 |
+
gascola/gascola/Hard/P009
|
11 |
+
hospital/hospital/Easy/P036
|
12 |
+
hospital/hospital/Hard/P049
|
13 |
+
japanesealley/japanesealley/Easy/P007
|
14 |
+
japanesealley/japanesealley/Hard/P005
|
15 |
+
neighborhood/neighborhood/Easy/P021
|
16 |
+
neighborhood/neighborhood/Hard/P017
|
17 |
+
ocean/ocean/Easy/P013
|
18 |
+
ocean/ocean/Hard/P009
|
19 |
+
office2/office2/Easy/P011
|
20 |
+
office2/office2/Hard/P010
|
21 |
+
office/office/Hard/P007
|
22 |
+
oldtown/oldtown/Easy/P007
|
23 |
+
oldtown/oldtown/Hard/P008
|
24 |
+
seasidetown/seasidetown/Easy/P009
|
25 |
+
seasonsforest/seasonsforest/Easy/P011
|
26 |
+
seasonsforest/seasonsforest/Hard/P006
|
27 |
+
seasonsforest_winter/seasonsforest_winter/Easy/P009
|
28 |
+
seasonsforest_winter/seasonsforest_winter/Hard/P018
|
29 |
+
soulcity/soulcity/Easy/P012
|
30 |
+
soulcity/soulcity/Hard/P009
|
31 |
+
westerndesert/westerndesert/Easy/P013
|
32 |
+
westerndesert/westerndesert/Hard/P007
|
thirdparty/DROID-SLAM/droid_slam/depth_video.py
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import lietorch
|
4 |
+
import droid_backends
|
5 |
+
|
6 |
+
from torch.multiprocessing import Process, Queue, Lock, Value
|
7 |
+
from collections import OrderedDict
|
8 |
+
|
9 |
+
from droid_net import cvx_upsample
|
10 |
+
import geom.projective_ops as pops
|
11 |
+
|
12 |
+
class DepthVideo:
|
13 |
+
def __init__(self, image_size=[480, 640], buffer=1024, stereo=False, device="cuda:0"):
|
14 |
+
|
15 |
+
# current keyframe count
|
16 |
+
self.counter = Value('i', 0)
|
17 |
+
self.ready = Value('i', 0)
|
18 |
+
self.ht = ht = image_size[0]
|
19 |
+
self.wd = wd = image_size[1]
|
20 |
+
|
21 |
+
### state attributes ###
|
22 |
+
self.tstamp = torch.zeros(buffer, device="cuda", dtype=torch.float).share_memory_()
|
23 |
+
self.images = torch.zeros(buffer, 3, ht, wd, device="cuda", dtype=torch.uint8)
|
24 |
+
self.dirty = torch.zeros(buffer, device="cuda", dtype=torch.bool).share_memory_()
|
25 |
+
self.red = torch.zeros(buffer, device="cuda", dtype=torch.bool).share_memory_()
|
26 |
+
self.poses = torch.zeros(buffer, 7, device="cuda", dtype=torch.float).share_memory_()
|
27 |
+
self.disps = torch.ones(buffer, ht//8, wd//8, device="cuda", dtype=torch.float).share_memory_()
|
28 |
+
self.disps_sens = torch.zeros(buffer, ht//8, wd//8, device="cuda", dtype=torch.float).share_memory_()
|
29 |
+
self.disps_up = torch.zeros(buffer, ht, wd, device="cuda", dtype=torch.float).share_memory_()
|
30 |
+
self.intrinsics = torch.zeros(buffer, 4, device="cuda", dtype=torch.float).share_memory_()
|
31 |
+
|
32 |
+
self.masks = torch.zeros(buffer, ht//8, wd//8, device="cuda", dtype=torch.float).share_memory_()
|
33 |
+
self.stereo = stereo
|
34 |
+
c = 1 if not self.stereo else 2
|
35 |
+
|
36 |
+
### feature attributes ###
|
37 |
+
self.fmaps = torch.zeros(buffer, c, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
|
38 |
+
self.nets = torch.zeros(buffer, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
|
39 |
+
self.inps = torch.zeros(buffer, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
|
40 |
+
|
41 |
+
# initialize poses to identity transformation
|
42 |
+
self.poses[:] = torch.as_tensor([0, 0, 0, 0, 0, 0, 1], dtype=torch.float, device="cuda")
|
43 |
+
|
44 |
+
def get_lock(self):
|
45 |
+
return self.counter.get_lock()
|
46 |
+
|
47 |
+
def __item_setter(self, index, item):
|
48 |
+
if isinstance(index, int) and index >= self.counter.value:
|
49 |
+
self.counter.value = index + 1
|
50 |
+
|
51 |
+
elif isinstance(index, torch.Tensor) and index.max().item() > self.counter.value:
|
52 |
+
self.counter.value = index.max().item() + 1
|
53 |
+
|
54 |
+
# self.dirty[index] = True
|
55 |
+
self.tstamp[index] = item[0]
|
56 |
+
self.images[index] = item[1]
|
57 |
+
|
58 |
+
if item[2] is not None:
|
59 |
+
self.poses[index] = item[2]
|
60 |
+
|
61 |
+
if item[3] is not None:
|
62 |
+
self.disps[index] = item[3]
|
63 |
+
|
64 |
+
if item[4] is not None:
|
65 |
+
depth = item[4][3::8,3::8]
|
66 |
+
self.disps_sens[index] = torch.where(depth>0, 1.0/depth, depth)
|
67 |
+
|
68 |
+
if item[5] is not None:
|
69 |
+
self.intrinsics[index] = item[5]
|
70 |
+
|
71 |
+
if len(item) > 6:
|
72 |
+
self.fmaps[index] = item[6]
|
73 |
+
|
74 |
+
if len(item) > 7:
|
75 |
+
self.nets[index] = item[7]
|
76 |
+
|
77 |
+
if len(item) > 8:
|
78 |
+
self.inps[index] = item[8]
|
79 |
+
|
80 |
+
if len(item) > 9:
|
81 |
+
self.masks[index] = item[9]
|
82 |
+
|
83 |
+
def __setitem__(self, index, item):
|
84 |
+
with self.get_lock():
|
85 |
+
self.__item_setter(index, item)
|
86 |
+
|
87 |
+
def __getitem__(self, index):
|
88 |
+
""" index the depth video """
|
89 |
+
|
90 |
+
with self.get_lock():
|
91 |
+
# support negative indexing
|
92 |
+
if isinstance(index, int) and index < 0:
|
93 |
+
index = self.counter.value + index
|
94 |
+
|
95 |
+
item = (
|
96 |
+
self.poses[index],
|
97 |
+
self.disps[index],
|
98 |
+
self.intrinsics[index],
|
99 |
+
self.fmaps[index],
|
100 |
+
self.nets[index],
|
101 |
+
self.inps[index])
|
102 |
+
|
103 |
+
return item
|
104 |
+
|
105 |
+
def append(self, *item):
|
106 |
+
with self.get_lock():
|
107 |
+
self.__item_setter(self.counter.value, item)
|
108 |
+
|
109 |
+
|
110 |
+
### geometric operations ###
|
111 |
+
|
112 |
+
@staticmethod
|
113 |
+
def format_indicies(ii, jj):
|
114 |
+
""" to device, long, {-1} """
|
115 |
+
|
116 |
+
if not isinstance(ii, torch.Tensor):
|
117 |
+
ii = torch.as_tensor(ii)
|
118 |
+
|
119 |
+
if not isinstance(jj, torch.Tensor):
|
120 |
+
jj = torch.as_tensor(jj)
|
121 |
+
|
122 |
+
ii = ii.to(device="cuda", dtype=torch.long).reshape(-1)
|
123 |
+
jj = jj.to(device="cuda", dtype=torch.long).reshape(-1)
|
124 |
+
|
125 |
+
return ii, jj
|
126 |
+
|
127 |
+
def upsample(self, ix, mask):
|
128 |
+
""" upsample disparity """
|
129 |
+
|
130 |
+
disps_up = cvx_upsample(self.disps[ix].unsqueeze(-1), mask)
|
131 |
+
self.disps_up[ix] = disps_up.squeeze()
|
132 |
+
|
133 |
+
def normalize(self):
|
134 |
+
""" normalize depth and poses """
|
135 |
+
|
136 |
+
with self.get_lock():
|
137 |
+
s = self.disps[:self.counter.value].mean()
|
138 |
+
self.disps[:self.counter.value] /= s
|
139 |
+
self.poses[:self.counter.value,:3] *= s
|
140 |
+
self.dirty[:self.counter.value] = True
|
141 |
+
|
142 |
+
|
143 |
+
def reproject(self, ii, jj):
|
144 |
+
""" project points from ii -> jj """
|
145 |
+
ii, jj = DepthVideo.format_indicies(ii, jj)
|
146 |
+
Gs = lietorch.SE3(self.poses[None])
|
147 |
+
|
148 |
+
coords, valid_mask = \
|
149 |
+
pops.projective_transform(Gs, self.disps[None], self.intrinsics[None], ii, jj)
|
150 |
+
|
151 |
+
return coords, valid_mask
|
152 |
+
|
153 |
+
def distance(self, ii=None, jj=None, beta=0.3, bidirectional=True):
|
154 |
+
""" frame distance metric """
|
155 |
+
|
156 |
+
return_matrix = False
|
157 |
+
if ii is None:
|
158 |
+
return_matrix = True
|
159 |
+
N = self.counter.value
|
160 |
+
ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N), indexing='ij')
|
161 |
+
|
162 |
+
ii, jj = DepthVideo.format_indicies(ii, jj)
|
163 |
+
|
164 |
+
if bidirectional:
|
165 |
+
|
166 |
+
poses = self.poses[:self.counter.value].clone()
|
167 |
+
|
168 |
+
d1 = droid_backends.frame_distance(
|
169 |
+
poses, self.disps, self.intrinsics[0], ii, jj, beta)
|
170 |
+
|
171 |
+
d2 = droid_backends.frame_distance(
|
172 |
+
poses, self.disps, self.intrinsics[0], jj, ii, beta)
|
173 |
+
|
174 |
+
d = .5 * (d1 + d2)
|
175 |
+
|
176 |
+
else:
|
177 |
+
d = droid_backends.frame_distance(
|
178 |
+
self.poses, self.disps, self.intrinsics[0], ii, jj, beta)
|
179 |
+
|
180 |
+
if return_matrix:
|
181 |
+
return d.reshape(N, N)
|
182 |
+
|
183 |
+
return d
|
184 |
+
|
185 |
+
def ba(self, target, weight, eta, ii, jj, t0=1, t1=None, itrs=2, lm=1e-4, ep=0.1, motion_only=False):
|
186 |
+
""" dense bundle adjustment (DBA) """
|
187 |
+
|
188 |
+
with self.get_lock():
|
189 |
+
|
190 |
+
# [t0, t1] window of bundle adjustment optimization
|
191 |
+
if t1 is None:
|
192 |
+
t1 = max(ii.max().item(), jj.max().item()) + 1
|
193 |
+
|
194 |
+
droid_backends.ba(self.poses, self.disps, self.intrinsics[0], self.disps_sens,
|
195 |
+
target, weight, eta, ii, jj, t0, t1, itrs, lm, ep, motion_only)
|
196 |
+
|
197 |
+
self.disps.clamp_(min=0.001)
|
thirdparty/DROID-SLAM/droid_slam/droid.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import lietorch
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from droid_net import DroidNet
|
6 |
+
from depth_video import DepthVideo
|
7 |
+
from motion_filter import MotionFilter
|
8 |
+
from droid_frontend import DroidFrontend
|
9 |
+
from droid_backend import DroidBackend
|
10 |
+
from trajectory_filler import PoseTrajectoryFiller
|
11 |
+
|
12 |
+
from collections import OrderedDict
|
13 |
+
from torch.multiprocessing import Process
|
14 |
+
|
15 |
+
|
16 |
+
class Droid:
|
17 |
+
def __init__(self, args):
|
18 |
+
super(Droid, self).__init__()
|
19 |
+
self.load_weights(args.weights)
|
20 |
+
self.args = args
|
21 |
+
self.disable_vis = args.disable_vis
|
22 |
+
|
23 |
+
# store images, depth, poses, intrinsics (shared between processes)
|
24 |
+
self.video = DepthVideo(args.image_size, args.buffer, stereo=args.stereo)
|
25 |
+
|
26 |
+
# filter incoming frames so that there is enough motion
|
27 |
+
self.filterx = MotionFilter(self.net, self.video, thresh=args.filter_thresh)
|
28 |
+
|
29 |
+
# frontend process
|
30 |
+
self.frontend = DroidFrontend(self.net, self.video, self.args)
|
31 |
+
|
32 |
+
# backend process
|
33 |
+
self.backend = DroidBackend(self.net, self.video, self.args)
|
34 |
+
|
35 |
+
# visualizer
|
36 |
+
if not self.disable_vis:
|
37 |
+
# from visualization import droid_visualization
|
38 |
+
from vis_headless import droid_visualization
|
39 |
+
print('Using headless ...')
|
40 |
+
self.visualizer = Process(target=droid_visualization, args=(self.video, '.'))
|
41 |
+
self.visualizer.start()
|
42 |
+
|
43 |
+
# post processor - fill in poses for non-keyframes
|
44 |
+
self.traj_filler = PoseTrajectoryFiller(self.net, self.video)
|
45 |
+
|
46 |
+
|
47 |
+
def load_weights(self, weights):
|
48 |
+
""" load trained model weights """
|
49 |
+
|
50 |
+
self.net = DroidNet()
|
51 |
+
state_dict = OrderedDict([
|
52 |
+
(k.replace("module.", ""), v) for (k, v) in torch.load(weights).items()])
|
53 |
+
|
54 |
+
state_dict["update.weight.2.weight"] = state_dict["update.weight.2.weight"][:2]
|
55 |
+
state_dict["update.weight.2.bias"] = state_dict["update.weight.2.bias"][:2]
|
56 |
+
state_dict["update.delta.2.weight"] = state_dict["update.delta.2.weight"][:2]
|
57 |
+
state_dict["update.delta.2.bias"] = state_dict["update.delta.2.bias"][:2]
|
58 |
+
|
59 |
+
self.net.load_state_dict(state_dict)
|
60 |
+
self.net.to("cuda:0").eval()
|
61 |
+
|
62 |
+
def track(self, tstamp, image, depth=None, intrinsics=None, mask=None):
|
63 |
+
""" main thread - update map """
|
64 |
+
|
65 |
+
with torch.no_grad():
|
66 |
+
# check there is enough motion
|
67 |
+
self.filterx.track(tstamp, image, depth, intrinsics, mask)
|
68 |
+
|
69 |
+
# local bundle adjustment
|
70 |
+
self.frontend()
|
71 |
+
|
72 |
+
# global bundle adjustment
|
73 |
+
# self.backend()
|
74 |
+
|
75 |
+
def terminate(self, stream=None, backend=True):
|
76 |
+
""" terminate the visualization process, return poses [t, q] """
|
77 |
+
|
78 |
+
del self.frontend
|
79 |
+
|
80 |
+
if backend:
|
81 |
+
torch.cuda.empty_cache()
|
82 |
+
# print("#" * 32)
|
83 |
+
self.backend(7)
|
84 |
+
|
85 |
+
torch.cuda.empty_cache()
|
86 |
+
# print("#" * 32)
|
87 |
+
self.backend(12)
|
88 |
+
|
89 |
+
camera_trajectory = self.traj_filler(stream)
|
90 |
+
return camera_trajectory.inv().data.cpu().numpy()
|
91 |
+
|
92 |
+
def compute_error(self):
|
93 |
+
""" compute slam reprojection error """
|
94 |
+
|
95 |
+
del self.frontend
|
96 |
+
|
97 |
+
torch.cuda.empty_cache()
|
98 |
+
self.backend(12)
|
99 |
+
|
100 |
+
return self.backend.errors[-1]
|
101 |
+
|
102 |
+
|
thirdparty/DROID-SLAM/droid_slam/droid_backend.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import lietorch
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from lietorch import SE3
|
6 |
+
from factor_graph import FactorGraph
|
7 |
+
|
8 |
+
|
9 |
+
class DroidBackend:
|
10 |
+
def __init__(self, net, video, args):
|
11 |
+
self.video = video
|
12 |
+
self.update_op = net.update
|
13 |
+
|
14 |
+
# global optimization window
|
15 |
+
self.t0 = 0
|
16 |
+
self.t1 = 0
|
17 |
+
|
18 |
+
self.upsample = args.upsample
|
19 |
+
self.beta = args.beta
|
20 |
+
self.backend_thresh = args.backend_thresh
|
21 |
+
self.backend_radius = args.backend_radius
|
22 |
+
self.backend_nms = args.backend_nms
|
23 |
+
self.errors = []
|
24 |
+
|
25 |
+
@torch.no_grad()
|
26 |
+
def __call__(self, steps=12):
|
27 |
+
""" main update """
|
28 |
+
|
29 |
+
t = self.video.counter.value
|
30 |
+
if not self.video.stereo and not torch.any(self.video.disps_sens):
|
31 |
+
self.video.normalize()
|
32 |
+
|
33 |
+
graph = FactorGraph(self.video, self.update_op, corr_impl="alt", max_factors=16*t, upsample=self.upsample)
|
34 |
+
|
35 |
+
graph.add_proximity_factors(rad=self.backend_radius,
|
36 |
+
nms=self.backend_nms,
|
37 |
+
thresh=self.backend_thresh,
|
38 |
+
beta=self.beta)
|
39 |
+
|
40 |
+
graph.update_lowmem(steps=steps)
|
41 |
+
self.errors.append(self.cal_err(graph))
|
42 |
+
graph.clear_edges()
|
43 |
+
self.video.dirty[:t] = True
|
44 |
+
|
45 |
+
return
|
46 |
+
|
47 |
+
def cal_err(self, graph):
|
48 |
+
coord, _ = graph.video.reproject(graph.ii, graph.jj)
|
49 |
+
diff = graph.target - coord
|
50 |
+
err = diff.norm(dim=-1).mean().item()
|
51 |
+
return err
|
52 |
+
|
thirdparty/DROID-SLAM/droid_slam/droid_frontend.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import lietorch
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from lietorch import SE3
|
6 |
+
from factor_graph import FactorGraph
|
7 |
+
|
8 |
+
|
9 |
+
class DroidFrontend:
|
10 |
+
def __init__(self, net, video, args):
|
11 |
+
self.video = video
|
12 |
+
self.update_op = net.update
|
13 |
+
self.graph = FactorGraph(video, net.update, max_factors=48, upsample=args.upsample)
|
14 |
+
|
15 |
+
# local optimization window
|
16 |
+
self.t0 = 0
|
17 |
+
self.t1 = 0
|
18 |
+
|
19 |
+
# frontent variables
|
20 |
+
self.is_initialized = False
|
21 |
+
self.count = 0
|
22 |
+
|
23 |
+
self.max_age = 25
|
24 |
+
self.iters1 = 4
|
25 |
+
self.iters2 = 2
|
26 |
+
|
27 |
+
self.warmup = args.warmup
|
28 |
+
self.beta = args.beta
|
29 |
+
self.frontend_nms = args.frontend_nms
|
30 |
+
self.keyframe_thresh = args.keyframe_thresh
|
31 |
+
self.frontend_window = args.frontend_window
|
32 |
+
self.frontend_thresh = args.frontend_thresh
|
33 |
+
self.frontend_radius = args.frontend_radius
|
34 |
+
|
35 |
+
def __update(self):
|
36 |
+
""" add edges, perform update """
|
37 |
+
|
38 |
+
self.count += 1
|
39 |
+
self.t1 += 1
|
40 |
+
|
41 |
+
if self.graph.corr is not None:
|
42 |
+
self.graph.rm_factors(self.graph.age > self.max_age, store=True)
|
43 |
+
|
44 |
+
self.graph.add_proximity_factors(self.t1-5, max(self.t1-self.frontend_window, 0),
|
45 |
+
rad=self.frontend_radius, nms=self.frontend_nms, thresh=self.frontend_thresh, beta=self.beta, remove=True)
|
46 |
+
|
47 |
+
self.video.disps[self.t1-1] = torch.where(self.video.disps_sens[self.t1-1] > 0,
|
48 |
+
self.video.disps_sens[self.t1-1], self.video.disps[self.t1-1])
|
49 |
+
|
50 |
+
for itr in range(self.iters1):
|
51 |
+
self.graph.update(None, None, use_inactive=True)
|
52 |
+
|
53 |
+
# set initial pose for next frame
|
54 |
+
poses = SE3(self.video.poses)
|
55 |
+
d = self.video.distance([self.t1-3], [self.t1-2], beta=self.beta, bidirectional=True)
|
56 |
+
|
57 |
+
if d.item() < self.keyframe_thresh:
|
58 |
+
self.graph.rm_keyframe(self.t1 - 2)
|
59 |
+
|
60 |
+
with self.video.get_lock():
|
61 |
+
self.video.counter.value -= 1
|
62 |
+
self.t1 -= 1
|
63 |
+
|
64 |
+
else:
|
65 |
+
for itr in range(self.iters2):
|
66 |
+
self.graph.update(None, None, use_inactive=True)
|
67 |
+
|
68 |
+
# set pose for next itration
|
69 |
+
self.video.poses[self.t1] = self.video.poses[self.t1-1]
|
70 |
+
self.video.disps[self.t1] = self.video.disps[self.t1-1].mean()
|
71 |
+
|
72 |
+
# update visualization
|
73 |
+
self.video.dirty[self.graph.ii.min():self.t1] = True
|
74 |
+
|
75 |
+
def __initialize(self):
|
76 |
+
""" initialize the SLAM system """
|
77 |
+
|
78 |
+
self.t0 = 0
|
79 |
+
self.t1 = self.video.counter.value
|
80 |
+
|
81 |
+
self.graph.add_neighborhood_factors(self.t0, self.t1, r=3)
|
82 |
+
|
83 |
+
for itr in range(8):
|
84 |
+
self.graph.update(1, use_inactive=True)
|
85 |
+
|
86 |
+
self.graph.add_proximity_factors(0, 0, rad=2, nms=2, thresh=self.frontend_thresh, remove=False)
|
87 |
+
|
88 |
+
for itr in range(8):
|
89 |
+
self.graph.update(1, use_inactive=True)
|
90 |
+
|
91 |
+
|
92 |
+
# self.video.normalize()
|
93 |
+
self.video.poses[self.t1] = self.video.poses[self.t1-1].clone()
|
94 |
+
self.video.disps[self.t1] = self.video.disps[self.t1-4:self.t1].mean()
|
95 |
+
|
96 |
+
# initialization complete
|
97 |
+
self.is_initialized = True
|
98 |
+
self.last_pose = self.video.poses[self.t1-1].clone()
|
99 |
+
self.last_disp = self.video.disps[self.t1-1].clone()
|
100 |
+
self.last_time = self.video.tstamp[self.t1-1].clone()
|
101 |
+
|
102 |
+
with self.video.get_lock():
|
103 |
+
self.video.ready.value = 1
|
104 |
+
self.video.dirty[:self.t1] = True
|
105 |
+
|
106 |
+
self.graph.rm_factors(self.graph.ii < self.warmup-4, store=True)
|
107 |
+
|
108 |
+
def __call__(self):
|
109 |
+
""" main update """
|
110 |
+
|
111 |
+
# do initialization
|
112 |
+
if not self.is_initialized and self.video.counter.value == self.warmup:
|
113 |
+
self.__initialize()
|
114 |
+
|
115 |
+
# do update
|
116 |
+
elif self.is_initialized and self.t1 < self.video.counter.value:
|
117 |
+
self.__update()
|
118 |
+
|
119 |
+
|
thirdparty/DROID-SLAM/droid_slam/droid_net.py
ADDED
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from collections import OrderedDict
|
6 |
+
|
7 |
+
from modules.extractor import BasicEncoder
|
8 |
+
from modules.corr import CorrBlock
|
9 |
+
from modules.gru import ConvGRU
|
10 |
+
from modules.clipping import GradientClip
|
11 |
+
|
12 |
+
from lietorch import SE3
|
13 |
+
from geom.ba import BA
|
14 |
+
|
15 |
+
import geom.projective_ops as pops
|
16 |
+
from geom.graph_utils import graph_to_edge_list, keyframe_indicies
|
17 |
+
|
18 |
+
from torch_scatter import scatter_mean
|
19 |
+
|
20 |
+
|
21 |
+
def cvx_upsample(data, mask):
|
22 |
+
""" upsample pixel-wise transformation field """
|
23 |
+
batch, ht, wd, dim = data.shape
|
24 |
+
data = data.permute(0, 3, 1, 2)
|
25 |
+
mask = mask.view(batch, 1, 9, 8, 8, ht, wd)
|
26 |
+
mask = torch.softmax(mask, dim=2)
|
27 |
+
|
28 |
+
up_data = F.unfold(data, [3,3], padding=1)
|
29 |
+
up_data = up_data.view(batch, dim, 9, 1, 1, ht, wd)
|
30 |
+
|
31 |
+
up_data = torch.sum(mask * up_data, dim=2)
|
32 |
+
up_data = up_data.permute(0, 4, 2, 5, 3, 1)
|
33 |
+
up_data = up_data.reshape(batch, 8*ht, 8*wd, dim)
|
34 |
+
|
35 |
+
return up_data
|
36 |
+
|
37 |
+
def upsample_disp(disp, mask):
|
38 |
+
batch, num, ht, wd = disp.shape
|
39 |
+
disp = disp.view(batch*num, ht, wd, 1)
|
40 |
+
mask = mask.view(batch*num, -1, ht, wd)
|
41 |
+
return cvx_upsample(disp, mask).view(batch, num, 8*ht, 8*wd)
|
42 |
+
|
43 |
+
|
44 |
+
class GraphAgg(nn.Module):
|
45 |
+
def __init__(self):
|
46 |
+
super(GraphAgg, self).__init__()
|
47 |
+
self.conv1 = nn.Conv2d(128, 128, 3, padding=1)
|
48 |
+
self.conv2 = nn.Conv2d(128, 128, 3, padding=1)
|
49 |
+
self.relu = nn.ReLU(inplace=True)
|
50 |
+
|
51 |
+
self.eta = nn.Sequential(
|
52 |
+
nn.Conv2d(128, 1, 3, padding=1),
|
53 |
+
GradientClip(),
|
54 |
+
nn.Softplus())
|
55 |
+
|
56 |
+
self.upmask = nn.Sequential(
|
57 |
+
nn.Conv2d(128, 8*8*9, 1, padding=0))
|
58 |
+
|
59 |
+
def forward(self, net, ii):
|
60 |
+
batch, num, ch, ht, wd = net.shape
|
61 |
+
net = net.view(batch*num, ch, ht, wd)
|
62 |
+
|
63 |
+
_, ix = torch.unique(ii, return_inverse=True)
|
64 |
+
net = self.relu(self.conv1(net))
|
65 |
+
|
66 |
+
net = net.view(batch, num, 128, ht, wd)
|
67 |
+
net = scatter_mean(net, ix, dim=1)
|
68 |
+
net = net.view(-1, 128, ht, wd)
|
69 |
+
|
70 |
+
net = self.relu(self.conv2(net))
|
71 |
+
|
72 |
+
eta = self.eta(net).view(batch, -1, ht, wd)
|
73 |
+
upmask = self.upmask(net).view(batch, -1, 8*8*9, ht, wd)
|
74 |
+
|
75 |
+
return .01 * eta, upmask
|
76 |
+
|
77 |
+
|
78 |
+
class UpdateModule(nn.Module):
|
79 |
+
def __init__(self):
|
80 |
+
super(UpdateModule, self).__init__()
|
81 |
+
cor_planes = 4 * (2*3 + 1)**2
|
82 |
+
|
83 |
+
self.corr_encoder = nn.Sequential(
|
84 |
+
nn.Conv2d(cor_planes, 128, 1, padding=0),
|
85 |
+
nn.ReLU(inplace=True),
|
86 |
+
nn.Conv2d(128, 128, 3, padding=1),
|
87 |
+
nn.ReLU(inplace=True))
|
88 |
+
|
89 |
+
self.flow_encoder = nn.Sequential(
|
90 |
+
nn.Conv2d(4, 128, 7, padding=3),
|
91 |
+
nn.ReLU(inplace=True),
|
92 |
+
nn.Conv2d(128, 64, 3, padding=1),
|
93 |
+
nn.ReLU(inplace=True))
|
94 |
+
|
95 |
+
self.weight = nn.Sequential(
|
96 |
+
nn.Conv2d(128, 128, 3, padding=1),
|
97 |
+
nn.ReLU(inplace=True),
|
98 |
+
nn.Conv2d(128, 2, 3, padding=1),
|
99 |
+
GradientClip(),
|
100 |
+
nn.Sigmoid())
|
101 |
+
|
102 |
+
self.delta = nn.Sequential(
|
103 |
+
nn.Conv2d(128, 128, 3, padding=1),
|
104 |
+
nn.ReLU(inplace=True),
|
105 |
+
nn.Conv2d(128, 2, 3, padding=1),
|
106 |
+
GradientClip())
|
107 |
+
|
108 |
+
self.gru = ConvGRU(128, 128+128+64)
|
109 |
+
self.agg = GraphAgg()
|
110 |
+
|
111 |
+
def forward(self, net, inp, corr, flow=None, ii=None, jj=None, mask=None):
|
112 |
+
""" RaftSLAM update operator """
|
113 |
+
|
114 |
+
batch, num, ch, ht, wd = net.shape
|
115 |
+
|
116 |
+
if flow is None:
|
117 |
+
flow = torch.zeros(batch, num, 4, ht, wd, device=net.device)
|
118 |
+
|
119 |
+
output_dim = (batch, num, -1, ht, wd)
|
120 |
+
net = net.view(batch*num, -1, ht, wd)
|
121 |
+
inp = inp.view(batch*num, -1, ht, wd)
|
122 |
+
corr = corr.view(batch*num, -1, ht, wd)
|
123 |
+
flow = flow.view(batch*num, -1, ht, wd)
|
124 |
+
|
125 |
+
corr = self.corr_encoder(corr)
|
126 |
+
flow = self.flow_encoder(flow)
|
127 |
+
net = self.gru(net, inp, corr, flow)
|
128 |
+
|
129 |
+
### update variables ###
|
130 |
+
delta = self.delta(net).view(*output_dim)
|
131 |
+
weight = self.weight(net).view(*output_dim)
|
132 |
+
|
133 |
+
# print('Update')
|
134 |
+
# print('delta:', delta.shape) # [1,1,2,64,48]
|
135 |
+
# print('weight:', weight.shape) # [1,1,2,64,48]
|
136 |
+
|
137 |
+
delta = delta.permute(0,1,3,4,2)[...,:2].contiguous()
|
138 |
+
weight = weight.permute(0,1,3,4,2)[...,:2].contiguous()
|
139 |
+
|
140 |
+
net = net.view(*output_dim)
|
141 |
+
|
142 |
+
if ii is not None:
|
143 |
+
eta, upmask = self.agg(net, ii.to(net.device))
|
144 |
+
return net, delta, weight, eta, upmask
|
145 |
+
|
146 |
+
else:
|
147 |
+
return net, delta, weight
|
148 |
+
|
149 |
+
|
150 |
+
class DroidNet(nn.Module):
|
151 |
+
def __init__(self):
|
152 |
+
super(DroidNet, self).__init__()
|
153 |
+
self.fnet = BasicEncoder(output_dim=128, norm_fn='instance')
|
154 |
+
self.cnet = BasicEncoder(output_dim=256, norm_fn='none')
|
155 |
+
self.update = UpdateModule()
|
156 |
+
|
157 |
+
|
158 |
+
def extract_features(self, images):
|
159 |
+
""" run feeature extraction networks """
|
160 |
+
|
161 |
+
# normalize images
|
162 |
+
images = images[:, :, [2,1,0]] / 255.0
|
163 |
+
mean = torch.as_tensor([0.485, 0.456, 0.406], device=images.device)
|
164 |
+
std = torch.as_tensor([0.229, 0.224, 0.225], device=images.device)
|
165 |
+
images = images.sub_(mean[:, None, None]).div_(std[:, None, None])
|
166 |
+
|
167 |
+
fmaps = self.fnet(images)
|
168 |
+
net = self.cnet(images)
|
169 |
+
|
170 |
+
net, inp = net.split([128,128], dim=2)
|
171 |
+
net = torch.tanh(net)
|
172 |
+
inp = torch.relu(inp)
|
173 |
+
return fmaps, net, inp
|
174 |
+
|
175 |
+
|
176 |
+
def forward(self, Gs, images, disps, intrinsics, graph=None, num_steps=12, fixedp=2):
|
177 |
+
""" Estimates SE3 or Sim3 between pair of frames """
|
178 |
+
|
179 |
+
u = keyframe_indicies(graph)
|
180 |
+
ii, jj, kk = graph_to_edge_list(graph)
|
181 |
+
|
182 |
+
ii = ii.to(device=images.device, dtype=torch.long)
|
183 |
+
jj = jj.to(device=images.device, dtype=torch.long)
|
184 |
+
|
185 |
+
fmaps, net, inp = self.extract_features(images)
|
186 |
+
net, inp = net[:,ii], inp[:,ii]
|
187 |
+
corr_fn = CorrBlock(fmaps[:,ii], fmaps[:,jj], num_levels=4, radius=3)
|
188 |
+
|
189 |
+
ht, wd = images.shape[-2:]
|
190 |
+
coords0 = pops.coords_grid(ht//8, wd//8, device=images.device)
|
191 |
+
|
192 |
+
coords1, _ = pops.projective_transform(Gs, disps, intrinsics, ii, jj)
|
193 |
+
target = coords1.clone()
|
194 |
+
|
195 |
+
Gs_list, disp_list, residual_list = [], [], []
|
196 |
+
for step in range(num_steps):
|
197 |
+
Gs = Gs.detach()
|
198 |
+
disps = disps.detach()
|
199 |
+
coords1 = coords1.detach()
|
200 |
+
target = target.detach()
|
201 |
+
|
202 |
+
# extract motion features
|
203 |
+
corr = corr_fn(coords1)
|
204 |
+
resd = target - coords1
|
205 |
+
flow = coords1 - coords0
|
206 |
+
|
207 |
+
motion = torch.cat([flow, resd], dim=-1)
|
208 |
+
motion = motion.permute(0,1,4,2,3).clamp(-64.0, 64.0)
|
209 |
+
|
210 |
+
net, delta, weight, eta, upmask = \
|
211 |
+
self.update(net, inp, corr, motion, ii, jj)
|
212 |
+
|
213 |
+
target = coords1 + delta
|
214 |
+
|
215 |
+
for i in range(2):
|
216 |
+
Gs, disps = BA(target, weight, eta, Gs, disps, intrinsics, ii, jj, fixedp=2)
|
217 |
+
|
218 |
+
coords1, valid_mask = pops.projective_transform(Gs, disps, intrinsics, ii, jj)
|
219 |
+
residual = (target - coords1)
|
220 |
+
|
221 |
+
Gs_list.append(Gs)
|
222 |
+
disp_list.append(upsample_disp(disps, upmask))
|
223 |
+
residual_list.append(valid_mask * residual)
|
224 |
+
|
225 |
+
|
226 |
+
return Gs_list, disp_list, residual_list
|
thirdparty/DROID-SLAM/droid_slam/factor_graph.py
ADDED
@@ -0,0 +1,397 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import lietorch
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
from lietorch import SE3
|
7 |
+
from modules.corr import CorrBlock, AltCorrBlock
|
8 |
+
import geom.projective_ops as pops
|
9 |
+
from glob import glob
|
10 |
+
|
11 |
+
class FactorGraph:
|
12 |
+
def __init__(self, video, update_op, device="cuda:0", corr_impl="volume", max_factors=-1, upsample=False):
|
13 |
+
self.video = video
|
14 |
+
self.update_op = update_op
|
15 |
+
self.device = device
|
16 |
+
self.max_factors = max_factors
|
17 |
+
self.corr_impl = corr_impl
|
18 |
+
self.upsample = upsample
|
19 |
+
|
20 |
+
# operator at 1/8 resolution
|
21 |
+
self.ht = ht = video.ht // 8
|
22 |
+
self.wd = wd = video.wd // 8
|
23 |
+
|
24 |
+
self.coords0 = pops.coords_grid(ht, wd, device=device)
|
25 |
+
self.ii = torch.as_tensor([], dtype=torch.long, device=device)
|
26 |
+
self.jj = torch.as_tensor([], dtype=torch.long, device=device)
|
27 |
+
self.age = torch.as_tensor([], dtype=torch.long, device=device)
|
28 |
+
|
29 |
+
self.corr, self.net, self.inp = None, None, None
|
30 |
+
self.damping = 1e-6 * torch.ones_like(self.video.disps)
|
31 |
+
|
32 |
+
self.target = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
|
33 |
+
self.weight = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
|
34 |
+
|
35 |
+
# inactive factors
|
36 |
+
self.ii_inac = torch.as_tensor([], dtype=torch.long, device=device)
|
37 |
+
self.jj_inac = torch.as_tensor([], dtype=torch.long, device=device)
|
38 |
+
self.ii_bad = torch.as_tensor([], dtype=torch.long, device=device)
|
39 |
+
self.jj_bad = torch.as_tensor([], dtype=torch.long, device=device)
|
40 |
+
|
41 |
+
self.target_inac = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
|
42 |
+
self.weight_inac = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
|
43 |
+
|
44 |
+
def __filter_repeated_edges(self, ii, jj):
|
45 |
+
""" remove duplicate edges """
|
46 |
+
|
47 |
+
keep = torch.zeros(ii.shape[0], dtype=torch.bool, device=ii.device)
|
48 |
+
eset = set(
|
49 |
+
[(i.item(), j.item()) for i, j in zip(self.ii, self.jj)] +
|
50 |
+
[(i.item(), j.item()) for i, j in zip(self.ii_inac, self.jj_inac)])
|
51 |
+
|
52 |
+
for k, (i, j) in enumerate(zip(ii, jj)):
|
53 |
+
keep[k] = (i.item(), j.item()) not in eset
|
54 |
+
|
55 |
+
return ii[keep], jj[keep]
|
56 |
+
|
57 |
+
def print_edges(self):
|
58 |
+
ii = self.ii.cpu().numpy()
|
59 |
+
jj = self.jj.cpu().numpy()
|
60 |
+
|
61 |
+
ix = np.argsort(ii)
|
62 |
+
ii = ii[ix]
|
63 |
+
jj = jj[ix]
|
64 |
+
|
65 |
+
w = torch.mean(self.weight, dim=[0,2,3,4]).cpu().numpy()
|
66 |
+
w = w[ix]
|
67 |
+
for e in zip(ii, jj, w):
|
68 |
+
print(e)
|
69 |
+
print()
|
70 |
+
|
71 |
+
def filter_edges(self):
|
72 |
+
""" remove bad edges """
|
73 |
+
conf = torch.mean(self.weight, dim=[0,2,3,4])
|
74 |
+
mask = (torch.abs(self.ii-self.jj) > 2) & (conf < 0.001)
|
75 |
+
|
76 |
+
self.ii_bad = torch.cat([self.ii_bad, self.ii[mask]])
|
77 |
+
self.jj_bad = torch.cat([self.jj_bad, self.jj[mask]])
|
78 |
+
self.rm_factors(mask, store=False)
|
79 |
+
|
80 |
+
def clear_edges(self):
|
81 |
+
self.rm_factors(self.ii >= 0)
|
82 |
+
self.net = None
|
83 |
+
self.inp = None
|
84 |
+
|
85 |
+
@torch.cuda.amp.autocast(enabled=True)
|
86 |
+
def add_factors(self, ii, jj, remove=False):
|
87 |
+
""" add edges to factor graph """
|
88 |
+
|
89 |
+
if not isinstance(ii, torch.Tensor):
|
90 |
+
ii = torch.as_tensor(ii, dtype=torch.long, device=self.device)
|
91 |
+
|
92 |
+
if not isinstance(jj, torch.Tensor):
|
93 |
+
jj = torch.as_tensor(jj, dtype=torch.long, device=self.device)
|
94 |
+
|
95 |
+
# remove duplicate edges
|
96 |
+
ii, jj = self.__filter_repeated_edges(ii, jj)
|
97 |
+
|
98 |
+
|
99 |
+
if ii.shape[0] == 0:
|
100 |
+
return
|
101 |
+
|
102 |
+
# place limit on number of factors
|
103 |
+
if self.max_factors > 0 and self.ii.shape[0] + ii.shape[0] > self.max_factors \
|
104 |
+
and self.corr is not None and remove:
|
105 |
+
|
106 |
+
ix = torch.arange(len(self.age))[torch.argsort(self.age).cpu()]
|
107 |
+
self.rm_factors(ix >= self.max_factors - ii.shape[0], store=True)
|
108 |
+
|
109 |
+
net = self.video.nets[ii].to(self.device).unsqueeze(0)
|
110 |
+
|
111 |
+
# correlation volume for new edges
|
112 |
+
if self.corr_impl == "volume":
|
113 |
+
c = (ii == jj).long()
|
114 |
+
fmap1 = self.video.fmaps[ii,0].to(self.device).unsqueeze(0)
|
115 |
+
fmap2 = self.video.fmaps[jj,c].to(self.device).unsqueeze(0)
|
116 |
+
corr = CorrBlock(fmap1, fmap2)
|
117 |
+
self.corr = corr if self.corr is None else self.corr.cat(corr)
|
118 |
+
|
119 |
+
inp = self.video.inps[ii].to(self.device).unsqueeze(0)
|
120 |
+
self.inp = inp if self.inp is None else torch.cat([self.inp, inp], 1)
|
121 |
+
|
122 |
+
with torch.cuda.amp.autocast(enabled=False):
|
123 |
+
target, _ = self.video.reproject(ii, jj)
|
124 |
+
weight = torch.zeros_like(target)
|
125 |
+
|
126 |
+
self.ii = torch.cat([self.ii, ii], 0)
|
127 |
+
self.jj = torch.cat([self.jj, jj], 0)
|
128 |
+
self.age = torch.cat([self.age, torch.zeros_like(ii)], 0)
|
129 |
+
|
130 |
+
# reprojection factors
|
131 |
+
self.net = net if self.net is None else torch.cat([self.net, net], 1)
|
132 |
+
|
133 |
+
self.target = torch.cat([self.target, target], 1)
|
134 |
+
self.weight = torch.cat([self.weight, weight], 1)
|
135 |
+
|
136 |
+
@torch.cuda.amp.autocast(enabled=True)
|
137 |
+
def rm_factors(self, mask, store=False):
|
138 |
+
""" drop edges from factor graph """
|
139 |
+
|
140 |
+
# store estimated factors
|
141 |
+
if store:
|
142 |
+
self.ii_inac = torch.cat([self.ii_inac, self.ii[mask]], 0)
|
143 |
+
self.jj_inac = torch.cat([self.jj_inac, self.jj[mask]], 0)
|
144 |
+
self.target_inac = torch.cat([self.target_inac, self.target[:,mask]], 1)
|
145 |
+
self.weight_inac = torch.cat([self.weight_inac, self.weight[:,mask]], 1)
|
146 |
+
|
147 |
+
self.ii = self.ii[~mask]
|
148 |
+
self.jj = self.jj[~mask]
|
149 |
+
self.age = self.age[~mask]
|
150 |
+
|
151 |
+
if self.corr_impl == "volume":
|
152 |
+
self.corr = self.corr[~mask]
|
153 |
+
|
154 |
+
if self.net is not None:
|
155 |
+
self.net = self.net[:,~mask]
|
156 |
+
|
157 |
+
if self.inp is not None:
|
158 |
+
self.inp = self.inp[:,~mask]
|
159 |
+
|
160 |
+
self.target = self.target[:,~mask]
|
161 |
+
self.weight = self.weight[:,~mask]
|
162 |
+
|
163 |
+
|
164 |
+
@torch.cuda.amp.autocast(enabled=True)
|
165 |
+
def rm_keyframe(self, ix):
|
166 |
+
""" drop edges from factor graph """
|
167 |
+
|
168 |
+
|
169 |
+
with self.video.get_lock():
|
170 |
+
self.video.images[ix] = self.video.images[ix+1]
|
171 |
+
self.video.poses[ix] = self.video.poses[ix+1]
|
172 |
+
self.video.disps[ix] = self.video.disps[ix+1]
|
173 |
+
self.video.disps_sens[ix] = self.video.disps_sens[ix+1]
|
174 |
+
self.video.intrinsics[ix] = self.video.intrinsics[ix+1]
|
175 |
+
|
176 |
+
self.video.nets[ix] = self.video.nets[ix+1]
|
177 |
+
self.video.inps[ix] = self.video.inps[ix+1]
|
178 |
+
self.video.fmaps[ix] = self.video.fmaps[ix+1]
|
179 |
+
self.video.tstamp[ix] = self.video.tstamp[ix+1]
|
180 |
+
self.video.masks[ix] = self.video.masks[ix+1]
|
181 |
+
|
182 |
+
m = (self.ii_inac == ix) | (self.jj_inac == ix)
|
183 |
+
self.ii_inac[self.ii_inac >= ix] -= 1
|
184 |
+
self.jj_inac[self.jj_inac >= ix] -= 1
|
185 |
+
|
186 |
+
if torch.any(m):
|
187 |
+
self.ii_inac = self.ii_inac[~m]
|
188 |
+
self.jj_inac = self.jj_inac[~m]
|
189 |
+
self.target_inac = self.target_inac[:,~m]
|
190 |
+
self.weight_inac = self.weight_inac[:,~m]
|
191 |
+
|
192 |
+
m = (self.ii == ix) | (self.jj == ix)
|
193 |
+
|
194 |
+
self.ii[self.ii >= ix] -= 1
|
195 |
+
self.jj[self.jj >= ix] -= 1
|
196 |
+
self.rm_factors(m, store=False)
|
197 |
+
|
198 |
+
|
199 |
+
@torch.cuda.amp.autocast(enabled=True)
|
200 |
+
def update(self, t0=None, t1=None, itrs=3, use_inactive=False, EP=1e-7, motion_only=False):
|
201 |
+
""" run update operator on factor graph """
|
202 |
+
|
203 |
+
# motion features
|
204 |
+
with torch.cuda.amp.autocast(enabled=False):
|
205 |
+
coords1, mask = self.video.reproject(self.ii, self.jj)
|
206 |
+
motn = torch.cat([coords1 - self.coords0, self.target - coords1], dim=-1)
|
207 |
+
motn = motn.permute(0,1,4,2,3).clamp(-64.0, 64.0)
|
208 |
+
|
209 |
+
# correlation features
|
210 |
+
corr = self.corr(coords1)
|
211 |
+
self.net, delta, weight, damping, upmask = \
|
212 |
+
self.update_op(self.net, self.inp, corr, motn, self.ii, self.jj)
|
213 |
+
|
214 |
+
##### save confidecnce weight for vis #####
|
215 |
+
# for k in range(len(self.ii)):
|
216 |
+
# w = weight[:, k].detach().cpu().numpy()
|
217 |
+
# idx_i = self.ii[k]
|
218 |
+
# idx_j = self.jj[k]
|
219 |
+
# np.save(f'pred_conf/{idx_i:04d}_{idx_j:04d}.npy', w)
|
220 |
+
#############################################
|
221 |
+
|
222 |
+
# Shapes:
|
223 |
+
# weight: [1, k, h//8, w//8, 2]
|
224 |
+
# self.ii: [k]; self.jj: [k]
|
225 |
+
msk = self.video.masks[self.ii] > 0
|
226 |
+
weight[:,msk] = 0.0
|
227 |
+
|
228 |
+
if t0 is None:
|
229 |
+
t0 = max(1, self.ii.min().item()+1)
|
230 |
+
|
231 |
+
with torch.cuda.amp.autocast(enabled=False):
|
232 |
+
self.target = coords1 + delta.to(dtype=torch.float)
|
233 |
+
self.weight = weight.to(dtype=torch.float)
|
234 |
+
|
235 |
+
ht, wd = self.coords0.shape[0:2]
|
236 |
+
self.damping[torch.unique(self.ii)] = damping
|
237 |
+
|
238 |
+
if use_inactive:
|
239 |
+
m = (self.ii_inac >= t0 - 3) & (self.jj_inac >= t0 - 3)
|
240 |
+
ii = torch.cat([self.ii_inac[m], self.ii], 0)
|
241 |
+
jj = torch.cat([self.jj_inac[m], self.jj], 0)
|
242 |
+
target = torch.cat([self.target_inac[:,m], self.target], 1)
|
243 |
+
weight = torch.cat([self.weight_inac[:,m], self.weight], 1)
|
244 |
+
|
245 |
+
else:
|
246 |
+
ii, jj, target, weight = self.ii, self.jj, self.target, self.weight
|
247 |
+
|
248 |
+
|
249 |
+
damping = .2 * self.damping[torch.unique(ii)].contiguous() + EP
|
250 |
+
|
251 |
+
target = target.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
|
252 |
+
weight = weight.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
|
253 |
+
|
254 |
+
# dense bundle adjustment
|
255 |
+
self.video.ba(target, weight, damping, ii, jj, t0, t1,
|
256 |
+
itrs=itrs, lm=1e-4, ep=0.1, motion_only=motion_only)
|
257 |
+
|
258 |
+
if self.upsample:
|
259 |
+
self.video.upsample(torch.unique(self.ii), upmask)
|
260 |
+
|
261 |
+
self.age += 1
|
262 |
+
|
263 |
+
|
264 |
+
@torch.cuda.amp.autocast(enabled=False)
|
265 |
+
def update_lowmem(self, t0=None, t1=None, itrs=2, use_inactive=False, EP=1e-7, steps=8):
|
266 |
+
""" run update operator on factor graph - reduced memory implementation """
|
267 |
+
|
268 |
+
# alternate corr implementation
|
269 |
+
t = self.video.counter.value
|
270 |
+
|
271 |
+
num, rig, ch, ht, wd = self.video.fmaps.shape
|
272 |
+
corr_op = AltCorrBlock(self.video.fmaps.view(1, num*rig, ch, ht, wd))
|
273 |
+
|
274 |
+
print("Global BA Iteration with {} steps".format(steps))
|
275 |
+
for step in range(steps):
|
276 |
+
# print("Global BA Iteration #{}".format(step+1))
|
277 |
+
with torch.cuda.amp.autocast(enabled=False):
|
278 |
+
coords1, mask = self.video.reproject(self.ii, self.jj)
|
279 |
+
motn = torch.cat([coords1 - self.coords0, self.target - coords1], dim=-1)
|
280 |
+
motn = motn.permute(0,1,4,2,3).clamp(-64.0, 64.0)
|
281 |
+
|
282 |
+
s = 8
|
283 |
+
for i in range(0, self.jj.max()+1, s):
|
284 |
+
v = (self.ii >= i) & (self.ii < i + s)
|
285 |
+
iis = self.ii[v]
|
286 |
+
jjs = self.jj[v]
|
287 |
+
|
288 |
+
ht, wd = self.coords0.shape[0:2]
|
289 |
+
corr1 = corr_op(coords1[:,v], rig * iis, rig * jjs + (iis == jjs).long())
|
290 |
+
|
291 |
+
with torch.cuda.amp.autocast(enabled=True):
|
292 |
+
|
293 |
+
net, delta, weight, damping, upmask = \
|
294 |
+
self.update_op(self.net[:,v], self.video.inps[None,iis], corr1, motn[:,v], iis, jjs)
|
295 |
+
|
296 |
+
if self.upsample:
|
297 |
+
self.video.upsample(torch.unique(iis), upmask)
|
298 |
+
|
299 |
+
# Shapes:
|
300 |
+
# weight: [1, k, h//8, w//8, 2]
|
301 |
+
# self.ii: [k]; self.jj: [k]
|
302 |
+
msk = self.video.masks[iis] > 0
|
303 |
+
weight[:,msk] = 0.0
|
304 |
+
|
305 |
+
self.net[:,v] = net
|
306 |
+
self.target[:,v] = coords1[:,v] + delta.float()
|
307 |
+
self.weight[:,v] = weight.float()
|
308 |
+
self.damping[torch.unique(iis)] = damping
|
309 |
+
|
310 |
+
damping = .2 * self.damping[torch.unique(self.ii)].contiguous() + EP
|
311 |
+
target = self.target.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
|
312 |
+
weight = self.weight.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
|
313 |
+
|
314 |
+
# dense bundle adjustment
|
315 |
+
self.video.ba(target, weight, damping, self.ii, self.jj, 1, t,
|
316 |
+
itrs=itrs, lm=1e-5, ep=1e-2, motion_only=False)
|
317 |
+
|
318 |
+
self.video.dirty[:t] = True
|
319 |
+
|
320 |
+
def add_neighborhood_factors(self, t0, t1, r=3):
|
321 |
+
""" add edges between neighboring frames within radius r """
|
322 |
+
|
323 |
+
ii, jj = torch.meshgrid(torch.arange(t0,t1), torch.arange(t0,t1), indexing='ij')
|
324 |
+
ii = ii.reshape(-1).to(dtype=torch.long, device=self.device)
|
325 |
+
jj = jj.reshape(-1).to(dtype=torch.long, device=self.device)
|
326 |
+
|
327 |
+
c = 1 if self.video.stereo else 0
|
328 |
+
|
329 |
+
keep = ((ii - jj).abs() > c) & ((ii - jj).abs() <= r)
|
330 |
+
self.add_factors(ii[keep], jj[keep])
|
331 |
+
|
332 |
+
|
333 |
+
def add_proximity_factors(self, t0=0, t1=0, rad=2, nms=2, beta=0.25, thresh=16.0, remove=False):
|
334 |
+
""" add edges to the factor graph based on distance """
|
335 |
+
|
336 |
+
t = self.video.counter.value
|
337 |
+
ix = torch.arange(t0, t)
|
338 |
+
jx = torch.arange(t1, t)
|
339 |
+
|
340 |
+
ii, jj = torch.meshgrid(ix, jx, indexing='ij')
|
341 |
+
ii = ii.reshape(-1)
|
342 |
+
jj = jj.reshape(-1)
|
343 |
+
|
344 |
+
d = self.video.distance(ii, jj, beta=beta)
|
345 |
+
d[ii - rad < jj] = np.inf
|
346 |
+
d[d > 100] = np.inf
|
347 |
+
|
348 |
+
ii1 = torch.cat([self.ii, self.ii_bad, self.ii_inac], 0)
|
349 |
+
jj1 = torch.cat([self.jj, self.jj_bad, self.jj_inac], 0)
|
350 |
+
for i, j in zip(ii1.cpu().numpy(), jj1.cpu().numpy()):
|
351 |
+
for di in range(-nms, nms+1):
|
352 |
+
for dj in range(-nms, nms+1):
|
353 |
+
if abs(di) + abs(dj) <= max(min(abs(i-j)-2, nms), 0):
|
354 |
+
i1 = i + di
|
355 |
+
j1 = j + dj
|
356 |
+
|
357 |
+
if (t0 <= i1 < t) and (t1 <= j1 < t):
|
358 |
+
d[(i1-t0)*(t-t1) + (j1-t1)] = np.inf
|
359 |
+
|
360 |
+
|
361 |
+
es = []
|
362 |
+
for i in range(t0, t):
|
363 |
+
if self.video.stereo:
|
364 |
+
es.append((i, i))
|
365 |
+
d[(i-t0)*(t-t1) + (i-t1)] = np.inf
|
366 |
+
|
367 |
+
for j in range(max(i-rad-1,0), i):
|
368 |
+
es.append((i,j))
|
369 |
+
es.append((j,i))
|
370 |
+
d[(i-t0)*(t-t1) + (j-t1)] = np.inf
|
371 |
+
|
372 |
+
ix = torch.argsort(d)
|
373 |
+
for k in ix:
|
374 |
+
if d[k].item() > thresh:
|
375 |
+
continue
|
376 |
+
|
377 |
+
if len(es) > self.max_factors:
|
378 |
+
break
|
379 |
+
|
380 |
+
i = ii[k]
|
381 |
+
j = jj[k]
|
382 |
+
|
383 |
+
# bidirectional
|
384 |
+
es.append((i, j))
|
385 |
+
es.append((j, i))
|
386 |
+
|
387 |
+
for di in range(-nms, nms+1):
|
388 |
+
for dj in range(-nms, nms+1):
|
389 |
+
if abs(di) + abs(dj) <= max(min(abs(i-j)-2, nms), 0):
|
390 |
+
i1 = i + di
|
391 |
+
j1 = j + dj
|
392 |
+
|
393 |
+
if (t0 <= i1 < t) and (t1 <= j1 < t):
|
394 |
+
d[(i1-t0)*(t-t1) + (j1-t1)] = np.inf
|
395 |
+
|
396 |
+
ii, jj = torch.as_tensor(es, device=self.device).unbind(dim=-1)
|
397 |
+
self.add_factors(ii, jj, remove)
|
thirdparty/DROID-SLAM/droid_slam/geom/__init__.py
ADDED
File without changes
|
thirdparty/DROID-SLAM/droid_slam/geom/ba.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import lietorch
|
2 |
+
import torch
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
from .chol import block_solve, schur_solve
|
6 |
+
import geom.projective_ops as pops
|
7 |
+
|
8 |
+
from torch_scatter import scatter_sum
|
9 |
+
|
10 |
+
|
11 |
+
# utility functions for scattering ops
|
12 |
+
def safe_scatter_add_mat(A, ii, jj, n, m):
|
13 |
+
v = (ii >= 0) & (jj >= 0) & (ii < n) & (jj < m)
|
14 |
+
return scatter_sum(A[:,v], ii[v]*m + jj[v], dim=1, dim_size=n*m)
|
15 |
+
|
16 |
+
def safe_scatter_add_vec(b, ii, n):
|
17 |
+
v = (ii >= 0) & (ii < n)
|
18 |
+
return scatter_sum(b[:,v], ii[v], dim=1, dim_size=n)
|
19 |
+
|
20 |
+
# apply retraction operator to inv-depth maps
|
21 |
+
def disp_retr(disps, dz, ii):
|
22 |
+
ii = ii.to(device=dz.device)
|
23 |
+
return disps + scatter_sum(dz, ii, dim=1, dim_size=disps.shape[1])
|
24 |
+
|
25 |
+
# apply retraction operator to poses
|
26 |
+
def pose_retr(poses, dx, ii):
|
27 |
+
ii = ii.to(device=dx.device)
|
28 |
+
return poses.retr(scatter_sum(dx, ii, dim=1, dim_size=poses.shape[1]))
|
29 |
+
|
30 |
+
|
31 |
+
def BA(target, weight, eta, poses, disps, intrinsics, ii, jj, fixedp=1, rig=1):
|
32 |
+
""" Full Bundle Adjustment """
|
33 |
+
|
34 |
+
B, P, ht, wd = disps.shape
|
35 |
+
N = ii.shape[0]
|
36 |
+
D = poses.manifold_dim
|
37 |
+
|
38 |
+
### 1: commpute jacobians and residuals ###
|
39 |
+
coords, valid, (Ji, Jj, Jz) = pops.projective_transform(
|
40 |
+
poses, disps, intrinsics, ii, jj, jacobian=True)
|
41 |
+
|
42 |
+
r = (target - coords).view(B, N, -1, 1)
|
43 |
+
w = .001 * (valid * weight).view(B, N, -1, 1)
|
44 |
+
|
45 |
+
### 2: construct linear system ###
|
46 |
+
Ji = Ji.reshape(B, N, -1, D)
|
47 |
+
Jj = Jj.reshape(B, N, -1, D)
|
48 |
+
wJiT = (w * Ji).transpose(2,3)
|
49 |
+
wJjT = (w * Jj).transpose(2,3)
|
50 |
+
|
51 |
+
Jz = Jz.reshape(B, N, ht*wd, -1)
|
52 |
+
|
53 |
+
Hii = torch.matmul(wJiT, Ji)
|
54 |
+
Hij = torch.matmul(wJiT, Jj)
|
55 |
+
Hji = torch.matmul(wJjT, Ji)
|
56 |
+
Hjj = torch.matmul(wJjT, Jj)
|
57 |
+
|
58 |
+
vi = torch.matmul(wJiT, r).squeeze(-1)
|
59 |
+
vj = torch.matmul(wJjT, r).squeeze(-1)
|
60 |
+
|
61 |
+
Ei = (wJiT.view(B,N,D,ht*wd,-1) * Jz[:,:,None]).sum(dim=-1)
|
62 |
+
Ej = (wJjT.view(B,N,D,ht*wd,-1) * Jz[:,:,None]).sum(dim=-1)
|
63 |
+
|
64 |
+
w = w.view(B, N, ht*wd, -1)
|
65 |
+
r = r.view(B, N, ht*wd, -1)
|
66 |
+
wk = torch.sum(w*r*Jz, dim=-1)
|
67 |
+
Ck = torch.sum(w*Jz*Jz, dim=-1)
|
68 |
+
|
69 |
+
kx, kk = torch.unique(ii, return_inverse=True)
|
70 |
+
M = kx.shape[0]
|
71 |
+
|
72 |
+
# only optimize keyframe poses
|
73 |
+
P = P // rig - fixedp
|
74 |
+
ii = ii // rig - fixedp
|
75 |
+
jj = jj // rig - fixedp
|
76 |
+
|
77 |
+
H = safe_scatter_add_mat(Hii, ii, ii, P, P) + \
|
78 |
+
safe_scatter_add_mat(Hij, ii, jj, P, P) + \
|
79 |
+
safe_scatter_add_mat(Hji, jj, ii, P, P) + \
|
80 |
+
safe_scatter_add_mat(Hjj, jj, jj, P, P)
|
81 |
+
|
82 |
+
E = safe_scatter_add_mat(Ei, ii, kk, P, M) + \
|
83 |
+
safe_scatter_add_mat(Ej, jj, kk, P, M)
|
84 |
+
|
85 |
+
v = safe_scatter_add_vec(vi, ii, P) + \
|
86 |
+
safe_scatter_add_vec(vj, jj, P)
|
87 |
+
|
88 |
+
C = safe_scatter_add_vec(Ck, kk, M)
|
89 |
+
w = safe_scatter_add_vec(wk, kk, M)
|
90 |
+
|
91 |
+
C = C + eta.view(*C.shape) + 1e-7
|
92 |
+
|
93 |
+
H = H.view(B, P, P, D, D)
|
94 |
+
E = E.view(B, P, M, D, ht*wd)
|
95 |
+
|
96 |
+
### 3: solve the system ###
|
97 |
+
dx, dz = schur_solve(H, E, C, v, w)
|
98 |
+
|
99 |
+
### 4: apply retraction ###
|
100 |
+
poses = pose_retr(poses, dx, torch.arange(P) + fixedp)
|
101 |
+
disps = disp_retr(disps, dz.view(B,-1,ht,wd), kx)
|
102 |
+
|
103 |
+
disps = torch.where(disps > 10, torch.zeros_like(disps), disps)
|
104 |
+
disps = disps.clamp(min=0.0)
|
105 |
+
|
106 |
+
return poses, disps
|
107 |
+
|
108 |
+
|
109 |
+
def MoBA(target, weight, eta, poses, disps, intrinsics, ii, jj, fixedp=1, rig=1):
|
110 |
+
""" Motion only bundle adjustment """
|
111 |
+
|
112 |
+
B, P, ht, wd = disps.shape
|
113 |
+
N = ii.shape[0]
|
114 |
+
D = poses.manifold_dim
|
115 |
+
|
116 |
+
### 1: commpute jacobians and residuals ###
|
117 |
+
coords, valid, (Ji, Jj, Jz) = pops.projective_transform(
|
118 |
+
poses, disps, intrinsics, ii, jj, jacobian=True)
|
119 |
+
|
120 |
+
r = (target - coords).view(B, N, -1, 1)
|
121 |
+
w = .001 * (valid * weight).view(B, N, -1, 1)
|
122 |
+
|
123 |
+
### 2: construct linear system ###
|
124 |
+
Ji = Ji.reshape(B, N, -1, D)
|
125 |
+
Jj = Jj.reshape(B, N, -1, D)
|
126 |
+
wJiT = (w * Ji).transpose(2,3)
|
127 |
+
wJjT = (w * Jj).transpose(2,3)
|
128 |
+
|
129 |
+
Hii = torch.matmul(wJiT, Ji)
|
130 |
+
Hij = torch.matmul(wJiT, Jj)
|
131 |
+
Hji = torch.matmul(wJjT, Ji)
|
132 |
+
Hjj = torch.matmul(wJjT, Jj)
|
133 |
+
|
134 |
+
vi = torch.matmul(wJiT, r).squeeze(-1)
|
135 |
+
vj = torch.matmul(wJjT, r).squeeze(-1)
|
136 |
+
|
137 |
+
# only optimize keyframe poses
|
138 |
+
P = P // rig - fixedp
|
139 |
+
ii = ii // rig - fixedp
|
140 |
+
jj = jj // rig - fixedp
|
141 |
+
|
142 |
+
H = safe_scatter_add_mat(Hii, ii, ii, P, P) + \
|
143 |
+
safe_scatter_add_mat(Hij, ii, jj, P, P) + \
|
144 |
+
safe_scatter_add_mat(Hji, jj, ii, P, P) + \
|
145 |
+
safe_scatter_add_mat(Hjj, jj, jj, P, P)
|
146 |
+
|
147 |
+
v = safe_scatter_add_vec(vi, ii, P) + \
|
148 |
+
safe_scatter_add_vec(vj, jj, P)
|
149 |
+
|
150 |
+
H = H.view(B, P, P, D, D)
|
151 |
+
|
152 |
+
### 3: solve the system ###
|
153 |
+
dx = block_solve(H, v)
|
154 |
+
|
155 |
+
### 4: apply retraction ###
|
156 |
+
poses = pose_retr(poses, dx, torch.arange(P) + fixedp)
|
157 |
+
return poses
|
158 |
+
|
thirdparty/DROID-SLAM/droid_slam/geom/chol.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
import geom.projective_ops as pops
|
4 |
+
|
5 |
+
class CholeskySolver(torch.autograd.Function):
|
6 |
+
@staticmethod
|
7 |
+
def forward(ctx, H, b):
|
8 |
+
# don't crash training if cholesky decomp fails
|
9 |
+
try:
|
10 |
+
U = torch.linalg.cholesky(H)
|
11 |
+
xs = torch.cholesky_solve(b, U)
|
12 |
+
ctx.save_for_backward(U, xs)
|
13 |
+
ctx.failed = False
|
14 |
+
except Exception as e:
|
15 |
+
print(e)
|
16 |
+
ctx.failed = True
|
17 |
+
xs = torch.zeros_like(b)
|
18 |
+
|
19 |
+
return xs
|
20 |
+
|
21 |
+
@staticmethod
|
22 |
+
def backward(ctx, grad_x):
|
23 |
+
if ctx.failed:
|
24 |
+
return None, None
|
25 |
+
|
26 |
+
U, xs = ctx.saved_tensors
|
27 |
+
dz = torch.cholesky_solve(grad_x, U)
|
28 |
+
dH = -torch.matmul(xs, dz.transpose(-1,-2))
|
29 |
+
|
30 |
+
return dH, dz
|
31 |
+
|
32 |
+
def block_solve(H, b, ep=0.1, lm=0.0001):
|
33 |
+
""" solve normal equations """
|
34 |
+
B, N, _, D, _ = H.shape
|
35 |
+
I = torch.eye(D).to(H.device)
|
36 |
+
H = H + (ep + lm*H) * I
|
37 |
+
|
38 |
+
H = H.permute(0,1,3,2,4)
|
39 |
+
H = H.reshape(B, N*D, N*D)
|
40 |
+
b = b.reshape(B, N*D, 1)
|
41 |
+
|
42 |
+
x = CholeskySolver.apply(H,b)
|
43 |
+
return x.reshape(B, N, D)
|
44 |
+
|
45 |
+
|
46 |
+
def schur_solve(H, E, C, v, w, ep=0.1, lm=0.0001, sless=False):
|
47 |
+
""" solve using shur complement """
|
48 |
+
|
49 |
+
B, P, M, D, HW = E.shape
|
50 |
+
H = H.permute(0,1,3,2,4).reshape(B, P*D, P*D)
|
51 |
+
E = E.permute(0,1,3,2,4).reshape(B, P*D, M*HW)
|
52 |
+
Q = (1.0 / C).view(B, M*HW, 1)
|
53 |
+
|
54 |
+
# damping
|
55 |
+
I = torch.eye(P*D).to(H.device)
|
56 |
+
H = H + (ep + lm*H) * I
|
57 |
+
|
58 |
+
v = v.reshape(B, P*D, 1)
|
59 |
+
w = w.reshape(B, M*HW, 1)
|
60 |
+
|
61 |
+
Et = E.transpose(1,2)
|
62 |
+
S = H - torch.matmul(E, Q*Et)
|
63 |
+
v = v - torch.matmul(E, Q*w)
|
64 |
+
|
65 |
+
dx = CholeskySolver.apply(S, v)
|
66 |
+
if sless:
|
67 |
+
return dx.reshape(B, P, D)
|
68 |
+
|
69 |
+
dz = Q * (w - Et @ dx)
|
70 |
+
dx = dx.reshape(B, P, D)
|
71 |
+
dz = dz.reshape(B, M, HW)
|
72 |
+
|
73 |
+
return dx, dz
|
thirdparty/DROID-SLAM/droid_slam/geom/graph_utils.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import torch
|
3 |
+
import numpy as np
|
4 |
+
from collections import OrderedDict
|
5 |
+
|
6 |
+
import lietorch
|
7 |
+
from data_readers.rgbd_utils import compute_distance_matrix_flow, compute_distance_matrix_flow2
|
8 |
+
|
9 |
+
|
10 |
+
def graph_to_edge_list(graph):
|
11 |
+
ii, jj, kk = [], [], []
|
12 |
+
for s, u in enumerate(graph):
|
13 |
+
for v in graph[u]:
|
14 |
+
ii.append(u)
|
15 |
+
jj.append(v)
|
16 |
+
kk.append(s)
|
17 |
+
|
18 |
+
ii = torch.as_tensor(ii)
|
19 |
+
jj = torch.as_tensor(jj)
|
20 |
+
kk = torch.as_tensor(kk)
|
21 |
+
return ii, jj, kk
|
22 |
+
|
23 |
+
def keyframe_indicies(graph):
|
24 |
+
return torch.as_tensor([u for u in graph])
|
25 |
+
|
26 |
+
def meshgrid(m, n, device='cuda'):
|
27 |
+
ii, jj = torch.meshgrid(torch.arange(m), torch.arange(n), indexing='ij')
|
28 |
+
return ii.reshape(-1).to(device), jj.reshape(-1).to(device)
|
29 |
+
|
30 |
+
def neighbourhood_graph(n, r):
|
31 |
+
ii, jj = meshgrid(n, n)
|
32 |
+
d = (ii - jj).abs()
|
33 |
+
keep = (d >= 1) & (d <= r)
|
34 |
+
return ii[keep], jj[keep]
|
35 |
+
|
36 |
+
|
37 |
+
def build_frame_graph(poses, disps, intrinsics, num=16, thresh=24.0, r=2):
|
38 |
+
""" construct a frame graph between co-visible frames """
|
39 |
+
N = poses.shape[1]
|
40 |
+
poses = poses[0].cpu().numpy()
|
41 |
+
disps = disps[0][:,3::8,3::8].cpu().numpy()
|
42 |
+
intrinsics = intrinsics[0].cpu().numpy() / 8.0
|
43 |
+
d = compute_distance_matrix_flow(poses, disps, intrinsics)
|
44 |
+
|
45 |
+
count = 0
|
46 |
+
graph = OrderedDict()
|
47 |
+
|
48 |
+
for i in range(N):
|
49 |
+
graph[i] = []
|
50 |
+
d[i,i] = np.inf
|
51 |
+
for j in range(i-r, i+r+1):
|
52 |
+
if 0 <= j < N and i != j:
|
53 |
+
graph[i].append(j)
|
54 |
+
d[i,j] = np.inf
|
55 |
+
count += 1
|
56 |
+
|
57 |
+
while count < num:
|
58 |
+
ix = np.argmin(d)
|
59 |
+
i, j = ix // N, ix % N
|
60 |
+
|
61 |
+
if d[i,j] < thresh:
|
62 |
+
graph[i].append(j)
|
63 |
+
d[i,j] = np.inf
|
64 |
+
count += 1
|
65 |
+
else:
|
66 |
+
break
|
67 |
+
|
68 |
+
return graph
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
def build_frame_graph_v2(poses, disps, intrinsics, num=16, thresh=24.0, r=2):
|
73 |
+
""" construct a frame graph between co-visible frames """
|
74 |
+
N = poses.shape[1]
|
75 |
+
# poses = poses[0].cpu().numpy()
|
76 |
+
# disps = disps[0].cpu().numpy()
|
77 |
+
# intrinsics = intrinsics[0].cpu().numpy()
|
78 |
+
d = compute_distance_matrix_flow2(poses, disps, intrinsics)
|
79 |
+
|
80 |
+
# import matplotlib.pyplot as plt
|
81 |
+
# plt.imshow(d)
|
82 |
+
# plt.show()
|
83 |
+
|
84 |
+
count = 0
|
85 |
+
graph = OrderedDict()
|
86 |
+
|
87 |
+
for i in range(N):
|
88 |
+
graph[i] = []
|
89 |
+
d[i,i] = np.inf
|
90 |
+
for j in range(i-r, i+r+1):
|
91 |
+
if 0 <= j < N and i != j:
|
92 |
+
graph[i].append(j)
|
93 |
+
d[i,j] = np.inf
|
94 |
+
count += 1
|
95 |
+
|
96 |
+
while 1:
|
97 |
+
ix = np.argmin(d)
|
98 |
+
i, j = ix // N, ix % N
|
99 |
+
|
100 |
+
if d[i,j] < thresh:
|
101 |
+
graph[i].append(j)
|
102 |
+
|
103 |
+
for i1 in range(i-1, i+2):
|
104 |
+
for j1 in range(j-1, j+2):
|
105 |
+
if 0 <= i1 < N and 0 <= j1 < N:
|
106 |
+
d[i1, j1] = np.inf
|
107 |
+
|
108 |
+
count += 1
|
109 |
+
else:
|
110 |
+
break
|
111 |
+
|
112 |
+
return graph
|
113 |
+
|
thirdparty/DROID-SLAM/droid_slam/geom/losses.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import OrderedDict
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
from lietorch import SO3, SE3, Sim3
|
5 |
+
from .graph_utils import graph_to_edge_list
|
6 |
+
from .projective_ops import projective_transform
|
7 |
+
|
8 |
+
|
9 |
+
def pose_metrics(dE):
|
10 |
+
""" Translation/Rotation/Scaling metrics from Sim3 """
|
11 |
+
t, q, s = dE.data.split([3, 4, 1], -1)
|
12 |
+
ang = SO3(q).log().norm(dim=-1)
|
13 |
+
|
14 |
+
# convert radians to degrees
|
15 |
+
r_err = (180 / np.pi) * ang
|
16 |
+
t_err = t.norm(dim=-1)
|
17 |
+
s_err = (s - 1.0).abs()
|
18 |
+
return r_err, t_err, s_err
|
19 |
+
|
20 |
+
|
21 |
+
def fit_scale(Ps, Gs):
|
22 |
+
b = Ps.shape[0]
|
23 |
+
t1 = Ps.data[...,:3].detach().reshape(b, -1)
|
24 |
+
t2 = Gs.data[...,:3].detach().reshape(b, -1)
|
25 |
+
|
26 |
+
s = (t1*t2).sum(-1) / ((t2*t2).sum(-1) + 1e-8)
|
27 |
+
return s
|
28 |
+
|
29 |
+
|
30 |
+
def geodesic_loss(Ps, Gs, graph, gamma=0.9, do_scale=True):
|
31 |
+
""" Loss function for training network """
|
32 |
+
|
33 |
+
# relative pose
|
34 |
+
ii, jj, kk = graph_to_edge_list(graph)
|
35 |
+
dP = Ps[:,jj] * Ps[:,ii].inv()
|
36 |
+
|
37 |
+
n = len(Gs)
|
38 |
+
geodesic_loss = 0.0
|
39 |
+
|
40 |
+
for i in range(n):
|
41 |
+
w = gamma ** (n - i - 1)
|
42 |
+
dG = Gs[i][:,jj] * Gs[i][:,ii].inv()
|
43 |
+
|
44 |
+
if do_scale:
|
45 |
+
s = fit_scale(dP, dG)
|
46 |
+
dG = dG.scale(s[:,None])
|
47 |
+
|
48 |
+
# pose error
|
49 |
+
d = (dG * dP.inv()).log()
|
50 |
+
|
51 |
+
if isinstance(dG, SE3):
|
52 |
+
tau, phi = d.split([3,3], dim=-1)
|
53 |
+
geodesic_loss += w * (
|
54 |
+
tau.norm(dim=-1).mean() +
|
55 |
+
phi.norm(dim=-1).mean())
|
56 |
+
|
57 |
+
elif isinstance(dG, Sim3):
|
58 |
+
tau, phi, sig = d.split([3,3,1], dim=-1)
|
59 |
+
geodesic_loss += w * (
|
60 |
+
tau.norm(dim=-1).mean() +
|
61 |
+
phi.norm(dim=-1).mean() +
|
62 |
+
0.05 * sig.norm(dim=-1).mean())
|
63 |
+
|
64 |
+
dE = Sim3(dG * dP.inv()).detach()
|
65 |
+
r_err, t_err, s_err = pose_metrics(dE)
|
66 |
+
|
67 |
+
metrics = {
|
68 |
+
'rot_error': r_err.mean().item(),
|
69 |
+
'tr_error': t_err.mean().item(),
|
70 |
+
'bad_rot': (r_err < .1).float().mean().item(),
|
71 |
+
'bad_tr': (t_err < .01).float().mean().item(),
|
72 |
+
}
|
73 |
+
|
74 |
+
return geodesic_loss, metrics
|
75 |
+
|
76 |
+
|
77 |
+
def residual_loss(residuals, gamma=0.9):
|
78 |
+
""" loss on system residuals """
|
79 |
+
residual_loss = 0.0
|
80 |
+
n = len(residuals)
|
81 |
+
|
82 |
+
for i in range(n):
|
83 |
+
w = gamma ** (n - i - 1)
|
84 |
+
residual_loss += w * residuals[i].abs().mean()
|
85 |
+
|
86 |
+
return residual_loss, {'residual': residual_loss.item()}
|
87 |
+
|
88 |
+
|
89 |
+
def flow_loss(Ps, disps, poses_est, disps_est, intrinsics, graph, gamma=0.9):
|
90 |
+
""" optical flow loss """
|
91 |
+
|
92 |
+
N = Ps.shape[1]
|
93 |
+
graph = OrderedDict()
|
94 |
+
for i in range(N):
|
95 |
+
graph[i] = [j for j in range(N) if abs(i-j)==1]
|
96 |
+
|
97 |
+
ii, jj, kk = graph_to_edge_list(graph)
|
98 |
+
coords0, val0 = projective_transform(Ps, disps, intrinsics, ii, jj)
|
99 |
+
val0 = val0 * (disps[:,ii] > 0).float().unsqueeze(dim=-1)
|
100 |
+
|
101 |
+
n = len(poses_est)
|
102 |
+
flow_loss = 0.0
|
103 |
+
|
104 |
+
for i in range(n):
|
105 |
+
w = gamma ** (n - i - 1)
|
106 |
+
coords1, val1 = projective_transform(poses_est[i], disps_est[i], intrinsics, ii, jj)
|
107 |
+
|
108 |
+
v = (val0 * val1).squeeze(dim=-1)
|
109 |
+
epe = v * (coords1 - coords0).norm(dim=-1)
|
110 |
+
flow_loss += w * epe.mean()
|
111 |
+
|
112 |
+
epe = epe.reshape(-1)[v.reshape(-1) > 0.5]
|
113 |
+
metrics = {
|
114 |
+
'f_error': epe.mean().item(),
|
115 |
+
'1px': (epe<1.0).float().mean().item(),
|
116 |
+
}
|
117 |
+
|
118 |
+
return flow_loss, metrics
|
thirdparty/DROID-SLAM/droid_slam/geom/projective_ops.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
|
4 |
+
from lietorch import SE3, Sim3
|
5 |
+
|
6 |
+
MIN_DEPTH = 0.2
|
7 |
+
|
8 |
+
def extract_intrinsics(intrinsics):
|
9 |
+
return intrinsics[...,None,None,:].unbind(dim=-1)
|
10 |
+
|
11 |
+
def coords_grid(ht, wd, **kwargs):
|
12 |
+
y, x = torch.meshgrid(
|
13 |
+
torch.arange(ht).to(**kwargs).float(),
|
14 |
+
torch.arange(wd).to(**kwargs).float(), indexing='ij')
|
15 |
+
|
16 |
+
return torch.stack([x, y], dim=-1)
|
17 |
+
|
18 |
+
def iproj(disps, intrinsics, jacobian=False):
|
19 |
+
""" pinhole camera inverse projection """
|
20 |
+
ht, wd = disps.shape[2:]
|
21 |
+
fx, fy, cx, cy = extract_intrinsics(intrinsics)
|
22 |
+
|
23 |
+
y, x = torch.meshgrid(
|
24 |
+
torch.arange(ht).to(disps.device).float(),
|
25 |
+
torch.arange(wd).to(disps.device).float(), indexing='ij')
|
26 |
+
|
27 |
+
i = torch.ones_like(disps)
|
28 |
+
X = (x - cx) / fx
|
29 |
+
Y = (y - cy) / fy
|
30 |
+
pts = torch.stack([X, Y, i, disps], dim=-1)
|
31 |
+
|
32 |
+
if jacobian:
|
33 |
+
J = torch.zeros_like(pts)
|
34 |
+
J[...,-1] = 1.0
|
35 |
+
return pts, J
|
36 |
+
|
37 |
+
return pts, None
|
38 |
+
|
39 |
+
def proj(Xs, intrinsics, jacobian=False, return_depth=False):
|
40 |
+
""" pinhole camera projection """
|
41 |
+
fx, fy, cx, cy = extract_intrinsics(intrinsics)
|
42 |
+
X, Y, Z, D = Xs.unbind(dim=-1)
|
43 |
+
|
44 |
+
Z = torch.where(Z < 0.5*MIN_DEPTH, torch.ones_like(Z), Z)
|
45 |
+
d = 1.0 / Z
|
46 |
+
|
47 |
+
x = fx * (X * d) + cx
|
48 |
+
y = fy * (Y * d) + cy
|
49 |
+
if return_depth:
|
50 |
+
coords = torch.stack([x, y, D*d], dim=-1)
|
51 |
+
else:
|
52 |
+
coords = torch.stack([x, y], dim=-1)
|
53 |
+
|
54 |
+
if jacobian:
|
55 |
+
B, N, H, W = d.shape
|
56 |
+
o = torch.zeros_like(d)
|
57 |
+
proj_jac = torch.stack([
|
58 |
+
fx*d, o, -fx*X*d*d, o,
|
59 |
+
o, fy*d, -fy*Y*d*d, o,
|
60 |
+
# o, o, -D*d*d, d,
|
61 |
+
], dim=-1).view(B, N, H, W, 2, 4)
|
62 |
+
|
63 |
+
return coords, proj_jac
|
64 |
+
|
65 |
+
return coords, None
|
66 |
+
|
67 |
+
def actp(Gij, X0, jacobian=False):
|
68 |
+
""" action on point cloud """
|
69 |
+
X1 = Gij[:,:,None,None] * X0
|
70 |
+
|
71 |
+
if jacobian:
|
72 |
+
X, Y, Z, d = X1.unbind(dim=-1)
|
73 |
+
o = torch.zeros_like(d)
|
74 |
+
B, N, H, W = d.shape
|
75 |
+
|
76 |
+
if isinstance(Gij, SE3):
|
77 |
+
Ja = torch.stack([
|
78 |
+
d, o, o, o, Z, -Y,
|
79 |
+
o, d, o, -Z, o, X,
|
80 |
+
o, o, d, Y, -X, o,
|
81 |
+
o, o, o, o, o, o,
|
82 |
+
], dim=-1).view(B, N, H, W, 4, 6)
|
83 |
+
|
84 |
+
elif isinstance(Gij, Sim3):
|
85 |
+
Ja = torch.stack([
|
86 |
+
d, o, o, o, Z, -Y, X,
|
87 |
+
o, d, o, -Z, o, X, Y,
|
88 |
+
o, o, d, Y, -X, o, Z,
|
89 |
+
o, o, o, o, o, o, o
|
90 |
+
], dim=-1).view(B, N, H, W, 4, 7)
|
91 |
+
|
92 |
+
return X1, Ja
|
93 |
+
|
94 |
+
return X1, None
|
95 |
+
|
96 |
+
def projective_transform(poses, depths, intrinsics, ii, jj, jacobian=False, return_depth=False):
|
97 |
+
""" map points from ii->jj """
|
98 |
+
|
99 |
+
# inverse project (pinhole)
|
100 |
+
X0, Jz = iproj(depths[:,ii], intrinsics[:,ii], jacobian=jacobian)
|
101 |
+
|
102 |
+
# transform
|
103 |
+
Gij = poses[:,jj] * poses[:,ii].inv()
|
104 |
+
|
105 |
+
Gij.data[:,ii==jj] = torch.as_tensor([-0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], device="cuda")
|
106 |
+
X1, Ja = actp(Gij, X0, jacobian=jacobian)
|
107 |
+
|
108 |
+
# project (pinhole)
|
109 |
+
x1, Jp = proj(X1, intrinsics[:,jj], jacobian=jacobian, return_depth=return_depth)
|
110 |
+
|
111 |
+
# exclude points too close to camera
|
112 |
+
valid = ((X1[...,2] > MIN_DEPTH) & (X0[...,2] > MIN_DEPTH)).float()
|
113 |
+
valid = valid.unsqueeze(-1)
|
114 |
+
|
115 |
+
if jacobian:
|
116 |
+
# Ji transforms according to dual adjoint
|
117 |
+
Jj = torch.matmul(Jp, Ja)
|
118 |
+
Ji = -Gij[:,:,None,None,None].adjT(Jj)
|
119 |
+
|
120 |
+
Jz = Gij[:,:,None,None] * Jz
|
121 |
+
Jz = torch.matmul(Jp, Jz.unsqueeze(-1))
|
122 |
+
|
123 |
+
return x1, valid, (Ji, Jj, Jz)
|
124 |
+
|
125 |
+
return x1, valid
|
126 |
+
|
127 |
+
def induced_flow(poses, disps, intrinsics, ii, jj):
|
128 |
+
""" optical flow induced by camera motion """
|
129 |
+
|
130 |
+
ht, wd = disps.shape[2:]
|
131 |
+
y, x = torch.meshgrid(
|
132 |
+
torch.arange(ht).to(disps.device).float(),
|
133 |
+
torch.arange(wd).to(disps.device).float(), indexing='ij')
|
134 |
+
|
135 |
+
coords0 = torch.stack([x, y], dim=-1)
|
136 |
+
coords1, valid = projective_transform(poses, disps, intrinsics, ii, jj, False)
|
137 |
+
|
138 |
+
return coords1[...,:2] - coords0, valid
|
139 |
+
|
thirdparty/DROID-SLAM/droid_slam/logger.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import torch
|
3 |
+
from torch.utils.tensorboard import SummaryWriter
|
4 |
+
|
5 |
+
|
6 |
+
SUM_FREQ = 100
|
7 |
+
|
8 |
+
class Logger:
|
9 |
+
def __init__(self, name, scheduler):
|
10 |
+
self.total_steps = 0
|
11 |
+
self.running_loss = {}
|
12 |
+
self.writer = None
|
13 |
+
self.name = name
|
14 |
+
self.scheduler = scheduler
|
15 |
+
|
16 |
+
def _print_training_status(self):
|
17 |
+
if self.writer is None:
|
18 |
+
self.writer = SummaryWriter('runs/%s' % self.name)
|
19 |
+
print([k for k in self.running_loss])
|
20 |
+
|
21 |
+
lr = self.scheduler.get_lr().pop()
|
22 |
+
metrics_data = [self.running_loss[k]/SUM_FREQ for k in self.running_loss.keys()]
|
23 |
+
training_str = "[{:6d}, {:10.7f}] ".format(self.total_steps+1, lr)
|
24 |
+
metrics_str = ("{:10.4f}, "*len(metrics_data)).format(*metrics_data)
|
25 |
+
|
26 |
+
# print the training status
|
27 |
+
print(training_str + metrics_str)
|
28 |
+
|
29 |
+
for key in self.running_loss:
|
30 |
+
val = self.running_loss[key] / SUM_FREQ
|
31 |
+
self.writer.add_scalar(key, val, self.total_steps)
|
32 |
+
self.running_loss[key] = 0.0
|
33 |
+
|
34 |
+
def push(self, metrics):
|
35 |
+
|
36 |
+
for key in metrics:
|
37 |
+
if key not in self.running_loss:
|
38 |
+
self.running_loss[key] = 0.0
|
39 |
+
|
40 |
+
self.running_loss[key] += metrics[key]
|
41 |
+
|
42 |
+
if self.total_steps % SUM_FREQ == SUM_FREQ-1:
|
43 |
+
self._print_training_status()
|
44 |
+
self.running_loss = {}
|
45 |
+
|
46 |
+
self.total_steps += 1
|
47 |
+
|
48 |
+
def write_dict(self, results):
|
49 |
+
for key in results:
|
50 |
+
self.writer.add_scalar(key, results[key], self.total_steps)
|
51 |
+
|
52 |
+
def close(self):
|
53 |
+
self.writer.close()
|
54 |
+
|
thirdparty/DROID-SLAM/droid_slam/modules/__init__.py
ADDED
File without changes
|
thirdparty/DROID-SLAM/droid_slam/modules/clipping.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
GRAD_CLIP = .01
|
6 |
+
|
7 |
+
class GradClip(torch.autograd.Function):
|
8 |
+
@staticmethod
|
9 |
+
def forward(ctx, x):
|
10 |
+
return x
|
11 |
+
|
12 |
+
@staticmethod
|
13 |
+
def backward(ctx, grad_x):
|
14 |
+
o = torch.zeros_like(grad_x)
|
15 |
+
grad_x = torch.where(grad_x.abs()>GRAD_CLIP, o, grad_x)
|
16 |
+
grad_x = torch.where(torch.isnan(grad_x), o, grad_x)
|
17 |
+
return grad_x
|
18 |
+
|
19 |
+
class GradientClip(nn.Module):
|
20 |
+
def __init__(self):
|
21 |
+
super(GradientClip, self).__init__()
|
22 |
+
|
23 |
+
def forward(self, x):
|
24 |
+
return GradClip.apply(x)
|
thirdparty/DROID-SLAM/droid_slam/modules/corr.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
|
4 |
+
import droid_backends
|
5 |
+
|
6 |
+
class CorrSampler(torch.autograd.Function):
|
7 |
+
|
8 |
+
@staticmethod
|
9 |
+
def forward(ctx, volume, coords, radius):
|
10 |
+
ctx.save_for_backward(volume,coords)
|
11 |
+
ctx.radius = radius
|
12 |
+
corr, = droid_backends.corr_index_forward(volume, coords, radius)
|
13 |
+
return corr
|
14 |
+
|
15 |
+
@staticmethod
|
16 |
+
def backward(ctx, grad_output):
|
17 |
+
volume, coords = ctx.saved_tensors
|
18 |
+
grad_output = grad_output.contiguous()
|
19 |
+
grad_volume, = droid_backends.corr_index_backward(volume, coords, grad_output, ctx.radius)
|
20 |
+
return grad_volume, None, None
|
21 |
+
|
22 |
+
|
23 |
+
class CorrBlock:
|
24 |
+
def __init__(self, fmap1, fmap2, num_levels=4, radius=3):
|
25 |
+
self.num_levels = num_levels
|
26 |
+
self.radius = radius
|
27 |
+
self.corr_pyramid = []
|
28 |
+
|
29 |
+
# all pairs correlation
|
30 |
+
corr = CorrBlock.corr(fmap1, fmap2)
|
31 |
+
|
32 |
+
batch, num, h1, w1, h2, w2 = corr.shape
|
33 |
+
corr = corr.reshape(batch*num*h1*w1, 1, h2, w2)
|
34 |
+
|
35 |
+
for i in range(self.num_levels):
|
36 |
+
self.corr_pyramid.append(
|
37 |
+
corr.view(batch*num, h1, w1, h2//2**i, w2//2**i))
|
38 |
+
corr = F.avg_pool2d(corr, 2, stride=2)
|
39 |
+
|
40 |
+
def __call__(self, coords):
|
41 |
+
out_pyramid = []
|
42 |
+
batch, num, ht, wd, _ = coords.shape
|
43 |
+
coords = coords.permute(0,1,4,2,3)
|
44 |
+
coords = coords.contiguous().view(batch*num, 2, ht, wd)
|
45 |
+
|
46 |
+
for i in range(self.num_levels):
|
47 |
+
corr = CorrSampler.apply(self.corr_pyramid[i], coords/2**i, self.radius)
|
48 |
+
out_pyramid.append(corr.view(batch, num, -1, ht, wd))
|
49 |
+
|
50 |
+
return torch.cat(out_pyramid, dim=2)
|
51 |
+
|
52 |
+
def cat(self, other):
|
53 |
+
for i in range(self.num_levels):
|
54 |
+
self.corr_pyramid[i] = torch.cat([self.corr_pyramid[i], other.corr_pyramid[i]], 0)
|
55 |
+
return self
|
56 |
+
|
57 |
+
def __getitem__(self, index):
|
58 |
+
for i in range(self.num_levels):
|
59 |
+
self.corr_pyramid[i] = self.corr_pyramid[i][index]
|
60 |
+
return self
|
61 |
+
|
62 |
+
|
63 |
+
@staticmethod
|
64 |
+
def corr(fmap1, fmap2):
|
65 |
+
""" all-pairs correlation """
|
66 |
+
batch, num, dim, ht, wd = fmap1.shape
|
67 |
+
fmap1 = fmap1.reshape(batch*num, dim, ht*wd) / 4.0
|
68 |
+
fmap2 = fmap2.reshape(batch*num, dim, ht*wd) / 4.0
|
69 |
+
|
70 |
+
corr = torch.matmul(fmap1.transpose(1,2), fmap2)
|
71 |
+
return corr.view(batch, num, ht, wd, ht, wd)
|
72 |
+
|
73 |
+
|
74 |
+
class CorrLayer(torch.autograd.Function):
|
75 |
+
@staticmethod
|
76 |
+
def forward(ctx, fmap1, fmap2, coords, r):
|
77 |
+
ctx.r = r
|
78 |
+
ctx.save_for_backward(fmap1, fmap2, coords)
|
79 |
+
corr, = droid_backends.altcorr_forward(fmap1, fmap2, coords, ctx.r)
|
80 |
+
return corr
|
81 |
+
|
82 |
+
@staticmethod
|
83 |
+
def backward(ctx, grad_corr):
|
84 |
+
fmap1, fmap2, coords = ctx.saved_tensors
|
85 |
+
grad_corr = grad_corr.contiguous()
|
86 |
+
fmap1_grad, fmap2_grad, coords_grad = \
|
87 |
+
droid_backends.altcorr_backward(fmap1, fmap2, coords, grad_corr, ctx.r)
|
88 |
+
return fmap1_grad, fmap2_grad, coords_grad, None
|
89 |
+
|
90 |
+
|
91 |
+
class AltCorrBlock:
|
92 |
+
def __init__(self, fmaps, num_levels=4, radius=3):
|
93 |
+
self.num_levels = num_levels
|
94 |
+
self.radius = radius
|
95 |
+
|
96 |
+
B, N, C, H, W = fmaps.shape
|
97 |
+
fmaps = fmaps.view(B*N, C, H, W) / 4.0
|
98 |
+
|
99 |
+
self.pyramid = []
|
100 |
+
for i in range(self.num_levels):
|
101 |
+
sz = (B, N, H//2**i, W//2**i, C)
|
102 |
+
fmap_lvl = fmaps.permute(0, 2, 3, 1).contiguous()
|
103 |
+
self.pyramid.append(fmap_lvl.view(*sz))
|
104 |
+
fmaps = F.avg_pool2d(fmaps, 2, stride=2)
|
105 |
+
|
106 |
+
def corr_fn(self, coords, ii, jj):
|
107 |
+
B, N, H, W, S, _ = coords.shape
|
108 |
+
coords = coords.permute(0, 1, 4, 2, 3, 5)
|
109 |
+
|
110 |
+
corr_list = []
|
111 |
+
for i in range(self.num_levels):
|
112 |
+
r = self.radius
|
113 |
+
fmap1_i = self.pyramid[0][:, ii]
|
114 |
+
fmap2_i = self.pyramid[i][:, jj]
|
115 |
+
|
116 |
+
coords_i = (coords / 2**i).reshape(B*N, S, H, W, 2).contiguous()
|
117 |
+
fmap1_i = fmap1_i.reshape((B*N,) + fmap1_i.shape[2:])
|
118 |
+
fmap2_i = fmap2_i.reshape((B*N,) + fmap2_i.shape[2:])
|
119 |
+
|
120 |
+
corr = CorrLayer.apply(fmap1_i.float(), fmap2_i.float(), coords_i, self.radius)
|
121 |
+
corr = corr.view(B, N, S, -1, H, W).permute(0, 1, 3, 4, 5, 2)
|
122 |
+
corr_list.append(corr)
|
123 |
+
|
124 |
+
corr = torch.cat(corr_list, dim=2)
|
125 |
+
return corr
|
126 |
+
|
127 |
+
|
128 |
+
def __call__(self, coords, ii, jj):
|
129 |
+
squeeze_output = False
|
130 |
+
if len(coords.shape) == 5:
|
131 |
+
coords = coords.unsqueeze(dim=-2)
|
132 |
+
squeeze_output = True
|
133 |
+
|
134 |
+
corr = self.corr_fn(coords, ii, jj)
|
135 |
+
|
136 |
+
if squeeze_output:
|
137 |
+
corr = corr.squeeze(dim=-1)
|
138 |
+
|
139 |
+
return corr.contiguous()
|
140 |
+
|
thirdparty/DROID-SLAM/droid_slam/modules/extractor.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
|
6 |
+
class ResidualBlock(nn.Module):
|
7 |
+
def __init__(self, in_planes, planes, norm_fn='group', stride=1):
|
8 |
+
super(ResidualBlock, self).__init__()
|
9 |
+
|
10 |
+
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride)
|
11 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
|
12 |
+
self.relu = nn.ReLU(inplace=True)
|
13 |
+
|
14 |
+
num_groups = planes // 8
|
15 |
+
|
16 |
+
if norm_fn == 'group':
|
17 |
+
self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
|
18 |
+
self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
|
19 |
+
if not stride == 1:
|
20 |
+
self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
|
21 |
+
|
22 |
+
elif norm_fn == 'batch':
|
23 |
+
self.norm1 = nn.BatchNorm2d(planes)
|
24 |
+
self.norm2 = nn.BatchNorm2d(planes)
|
25 |
+
if not stride == 1:
|
26 |
+
self.norm3 = nn.BatchNorm2d(planes)
|
27 |
+
|
28 |
+
elif norm_fn == 'instance':
|
29 |
+
self.norm1 = nn.InstanceNorm2d(planes)
|
30 |
+
self.norm2 = nn.InstanceNorm2d(planes)
|
31 |
+
if not stride == 1:
|
32 |
+
self.norm3 = nn.InstanceNorm2d(planes)
|
33 |
+
|
34 |
+
elif norm_fn == 'none':
|
35 |
+
self.norm1 = nn.Sequential()
|
36 |
+
self.norm2 = nn.Sequential()
|
37 |
+
if not stride == 1:
|
38 |
+
self.norm3 = nn.Sequential()
|
39 |
+
|
40 |
+
if stride == 1:
|
41 |
+
self.downsample = None
|
42 |
+
|
43 |
+
else:
|
44 |
+
self.downsample = nn.Sequential(
|
45 |
+
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3)
|
46 |
+
|
47 |
+
def forward(self, x):
|
48 |
+
y = x
|
49 |
+
y = self.relu(self.norm1(self.conv1(y)))
|
50 |
+
y = self.relu(self.norm2(self.conv2(y)))
|
51 |
+
|
52 |
+
if self.downsample is not None:
|
53 |
+
x = self.downsample(x)
|
54 |
+
|
55 |
+
return self.relu(x+y)
|
56 |
+
|
57 |
+
|
58 |
+
class BottleneckBlock(nn.Module):
|
59 |
+
def __init__(self, in_planes, planes, norm_fn='group', stride=1):
|
60 |
+
super(BottleneckBlock, self).__init__()
|
61 |
+
|
62 |
+
self.conv1 = nn.Conv2d(in_planes, planes//4, kernel_size=1, padding=0)
|
63 |
+
self.conv2 = nn.Conv2d(planes//4, planes//4, kernel_size=3, padding=1, stride=stride)
|
64 |
+
self.conv3 = nn.Conv2d(planes//4, planes, kernel_size=1, padding=0)
|
65 |
+
self.relu = nn.ReLU(inplace=True)
|
66 |
+
|
67 |
+
num_groups = planes // 8
|
68 |
+
|
69 |
+
if norm_fn == 'group':
|
70 |
+
self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
|
71 |
+
self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
|
72 |
+
self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
|
73 |
+
if not stride == 1:
|
74 |
+
self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
|
75 |
+
|
76 |
+
elif norm_fn == 'batch':
|
77 |
+
self.norm1 = nn.BatchNorm2d(planes//4)
|
78 |
+
self.norm2 = nn.BatchNorm2d(planes//4)
|
79 |
+
self.norm3 = nn.BatchNorm2d(planes)
|
80 |
+
if not stride == 1:
|
81 |
+
self.norm4 = nn.BatchNorm2d(planes)
|
82 |
+
|
83 |
+
elif norm_fn == 'instance':
|
84 |
+
self.norm1 = nn.InstanceNorm2d(planes//4)
|
85 |
+
self.norm2 = nn.InstanceNorm2d(planes//4)
|
86 |
+
self.norm3 = nn.InstanceNorm2d(planes)
|
87 |
+
if not stride == 1:
|
88 |
+
self.norm4 = nn.InstanceNorm2d(planes)
|
89 |
+
|
90 |
+
elif norm_fn == 'none':
|
91 |
+
self.norm1 = nn.Sequential()
|
92 |
+
self.norm2 = nn.Sequential()
|
93 |
+
self.norm3 = nn.Sequential()
|
94 |
+
if not stride == 1:
|
95 |
+
self.norm4 = nn.Sequential()
|
96 |
+
|
97 |
+
if stride == 1:
|
98 |
+
self.downsample = None
|
99 |
+
|
100 |
+
else:
|
101 |
+
self.downsample = nn.Sequential(
|
102 |
+
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4)
|
103 |
+
|
104 |
+
def forward(self, x):
|
105 |
+
y = x
|
106 |
+
y = self.relu(self.norm1(self.conv1(y)))
|
107 |
+
y = self.relu(self.norm2(self.conv2(y)))
|
108 |
+
y = self.relu(self.norm3(self.conv3(y)))
|
109 |
+
|
110 |
+
if self.downsample is not None:
|
111 |
+
x = self.downsample(x)
|
112 |
+
|
113 |
+
return self.relu(x+y)
|
114 |
+
|
115 |
+
|
116 |
+
DIM=32
|
117 |
+
|
118 |
+
class BasicEncoder(nn.Module):
|
119 |
+
def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0, multidim=False):
|
120 |
+
super(BasicEncoder, self).__init__()
|
121 |
+
self.norm_fn = norm_fn
|
122 |
+
self.multidim = multidim
|
123 |
+
|
124 |
+
if self.norm_fn == 'group':
|
125 |
+
self.norm1 = nn.GroupNorm(num_groups=8, num_channels=DIM)
|
126 |
+
|
127 |
+
elif self.norm_fn == 'batch':
|
128 |
+
self.norm1 = nn.BatchNorm2d(DIM)
|
129 |
+
|
130 |
+
elif self.norm_fn == 'instance':
|
131 |
+
self.norm1 = nn.InstanceNorm2d(DIM)
|
132 |
+
|
133 |
+
elif self.norm_fn == 'none':
|
134 |
+
self.norm1 = nn.Sequential()
|
135 |
+
|
136 |
+
self.conv1 = nn.Conv2d(3, DIM, kernel_size=7, stride=2, padding=3)
|
137 |
+
self.relu1 = nn.ReLU(inplace=True)
|
138 |
+
|
139 |
+
self.in_planes = DIM
|
140 |
+
self.layer1 = self._make_layer(DIM, stride=1)
|
141 |
+
self.layer2 = self._make_layer(2*DIM, stride=2)
|
142 |
+
self.layer3 = self._make_layer(4*DIM, stride=2)
|
143 |
+
|
144 |
+
# output convolution
|
145 |
+
self.conv2 = nn.Conv2d(4*DIM, output_dim, kernel_size=1)
|
146 |
+
|
147 |
+
if self.multidim:
|
148 |
+
self.layer4 = self._make_layer(256, stride=2)
|
149 |
+
self.layer5 = self._make_layer(512, stride=2)
|
150 |
+
|
151 |
+
self.in_planes = 256
|
152 |
+
self.layer6 = self._make_layer(256, stride=1)
|
153 |
+
|
154 |
+
self.in_planes = 128
|
155 |
+
self.layer7 = self._make_layer(128, stride=1)
|
156 |
+
|
157 |
+
self.up1 = nn.Conv2d(512, 256, 1)
|
158 |
+
self.up2 = nn.Conv2d(256, 128, 1)
|
159 |
+
self.conv3 = nn.Conv2d(128, output_dim, kernel_size=1)
|
160 |
+
|
161 |
+
if dropout > 0:
|
162 |
+
self.dropout = nn.Dropout2d(p=dropout)
|
163 |
+
else:
|
164 |
+
self.dropout = None
|
165 |
+
|
166 |
+
for m in self.modules():
|
167 |
+
if isinstance(m, nn.Conv2d):
|
168 |
+
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
169 |
+
elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
|
170 |
+
if m.weight is not None:
|
171 |
+
nn.init.constant_(m.weight, 1)
|
172 |
+
if m.bias is not None:
|
173 |
+
nn.init.constant_(m.bias, 0)
|
174 |
+
|
175 |
+
def _make_layer(self, dim, stride=1):
|
176 |
+
layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
|
177 |
+
layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
|
178 |
+
layers = (layer1, layer2)
|
179 |
+
|
180 |
+
self.in_planes = dim
|
181 |
+
return nn.Sequential(*layers)
|
182 |
+
|
183 |
+
def forward(self, x):
|
184 |
+
b, n, c1, h1, w1 = x.shape
|
185 |
+
x = x.view(b*n, c1, h1, w1)
|
186 |
+
|
187 |
+
x = self.conv1(x)
|
188 |
+
x = self.norm1(x)
|
189 |
+
x = self.relu1(x)
|
190 |
+
|
191 |
+
x = self.layer1(x)
|
192 |
+
x = self.layer2(x)
|
193 |
+
x = self.layer3(x)
|
194 |
+
|
195 |
+
x = self.conv2(x)
|
196 |
+
|
197 |
+
_, c2, h2, w2 = x.shape
|
198 |
+
return x.view(b, n, c2, h2, w2)
|
thirdparty/DROID-SLAM/droid_slam/modules/gru.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
|
4 |
+
|
5 |
+
class ConvGRU(nn.Module):
|
6 |
+
def __init__(self, h_planes=128, i_planes=128):
|
7 |
+
super(ConvGRU, self).__init__()
|
8 |
+
self.do_checkpoint = False
|
9 |
+
self.convz = nn.Conv2d(h_planes+i_planes, h_planes, 3, padding=1)
|
10 |
+
self.convr = nn.Conv2d(h_planes+i_planes, h_planes, 3, padding=1)
|
11 |
+
self.convq = nn.Conv2d(h_planes+i_planes, h_planes, 3, padding=1)
|
12 |
+
|
13 |
+
self.w = nn.Conv2d(h_planes, h_planes, 1, padding=0)
|
14 |
+
|
15 |
+
self.convz_glo = nn.Conv2d(h_planes, h_planes, 1, padding=0)
|
16 |
+
self.convr_glo = nn.Conv2d(h_planes, h_planes, 1, padding=0)
|
17 |
+
self.convq_glo = nn.Conv2d(h_planes, h_planes, 1, padding=0)
|
18 |
+
|
19 |
+
def forward(self, net, *inputs):
|
20 |
+
inp = torch.cat(inputs, dim=1)
|
21 |
+
net_inp = torch.cat([net, inp], dim=1)
|
22 |
+
|
23 |
+
b, c, h, w = net.shape
|
24 |
+
glo = torch.sigmoid(self.w(net)) * net
|
25 |
+
glo = glo.view(b, c, h*w).mean(-1).view(b, c, 1, 1)
|
26 |
+
|
27 |
+
z = torch.sigmoid(self.convz(net_inp) + self.convz_glo(glo))
|
28 |
+
r = torch.sigmoid(self.convr(net_inp) + self.convr_glo(glo))
|
29 |
+
q = torch.tanh(self.convq(torch.cat([r*net, inp], dim=1)) + self.convq_glo(glo))
|
30 |
+
|
31 |
+
net = (1-z) * net + z * q
|
32 |
+
return net
|
33 |
+
|
34 |
+
|
thirdparty/DROID-SLAM/droid_slam/motion_filter.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import torch
|
3 |
+
import lietorch
|
4 |
+
|
5 |
+
from collections import OrderedDict
|
6 |
+
from droid_net import DroidNet
|
7 |
+
|
8 |
+
import geom.projective_ops as pops
|
9 |
+
from modules.corr import CorrBlock
|
10 |
+
|
11 |
+
|
12 |
+
class MotionFilter:
|
13 |
+
""" This class is used to filter incoming frames and extract features """
|
14 |
+
|
15 |
+
def __init__(self, net, video, thresh=2.5, device="cuda:0"):
|
16 |
+
|
17 |
+
# split net modules
|
18 |
+
self.cnet = net.cnet
|
19 |
+
self.fnet = net.fnet
|
20 |
+
self.update = net.update
|
21 |
+
|
22 |
+
self.video = video
|
23 |
+
self.thresh = thresh
|
24 |
+
self.device = device
|
25 |
+
|
26 |
+
self.count = 0
|
27 |
+
|
28 |
+
# mean, std for image normalization
|
29 |
+
self.MEAN = torch.as_tensor([0.485, 0.456, 0.406], device=self.device)[:, None, None]
|
30 |
+
self.STDV = torch.as_tensor([0.229, 0.224, 0.225], device=self.device)[:, None, None]
|
31 |
+
|
32 |
+
@torch.cuda.amp.autocast(enabled=True)
|
33 |
+
def __context_encoder(self, image):
|
34 |
+
""" context features """
|
35 |
+
net, inp = self.cnet(image).split([128,128], dim=2)
|
36 |
+
return net.tanh().squeeze(0), inp.relu().squeeze(0)
|
37 |
+
|
38 |
+
@torch.cuda.amp.autocast(enabled=True)
|
39 |
+
def __feature_encoder(self, image):
|
40 |
+
""" features for correlation volume """
|
41 |
+
return self.fnet(image).squeeze(0)
|
42 |
+
|
43 |
+
@torch.cuda.amp.autocast(enabled=True)
|
44 |
+
@torch.no_grad()
|
45 |
+
def track(self, tstamp, image, depth=None, intrinsics=None, mask=None):
|
46 |
+
""" main update operation - run on every frame in video """
|
47 |
+
|
48 |
+
Id = lietorch.SE3.Identity(1,).data.squeeze()
|
49 |
+
ht = image.shape[-2] // 8
|
50 |
+
wd = image.shape[-1] // 8
|
51 |
+
|
52 |
+
# normalize images
|
53 |
+
inputs = image[None, :, [2,1,0]].to(self.device) / 255.0
|
54 |
+
inputs = inputs.sub_(self.MEAN).div_(self.STDV)
|
55 |
+
|
56 |
+
# extract features
|
57 |
+
gmap = self.__feature_encoder(inputs) # [1, 128, gh, gw]
|
58 |
+
if mask is None:
|
59 |
+
mask = torch.zeros([gmap.shape[-2], gmap.shape[-1]]).to(gmap)
|
60 |
+
# if mask is not None:
|
61 |
+
# # bias = self.fnet.conv2.bias.detach().clone().half()
|
62 |
+
# # gmap[:,:,mask>0.0] = bias[:, None].repeat(1, (mask>0.0).sum())
|
63 |
+
# gmap[:,:,mask>0.0] = 0
|
64 |
+
|
65 |
+
### always add first frame to the depth video ###
|
66 |
+
if self.video.counter.value == 0:
|
67 |
+
net, inp = self.__context_encoder(inputs[:,[0]])
|
68 |
+
self.net, self.inp, self.fmap = net, inp, gmap
|
69 |
+
self.video.append(tstamp, image[0], Id, 1.0, depth, intrinsics / 8.0, gmap, net[0,0], inp[0,0], mask)
|
70 |
+
# msk: torch.Size([64, 48])
|
71 |
+
# gmap: torch.Size([1, 128, 64, 48])
|
72 |
+
# net: torch.Size([1, 128, 64, 48])
|
73 |
+
# inp: torch.Size([1, 128, 64, 48])
|
74 |
+
|
75 |
+
### only add new frame if there is enough motion ###
|
76 |
+
else:
|
77 |
+
# index correlation volume
|
78 |
+
coords0 = pops.coords_grid(ht, wd, device=self.device)[None,None]
|
79 |
+
corr = CorrBlock(self.fmap[None,[0]], gmap[None,[0]])(coords0)
|
80 |
+
|
81 |
+
# approximate flow magnitude using 1 update iteration
|
82 |
+
_, delta, weight = self.update(self.net[None], self.inp[None], corr)
|
83 |
+
|
84 |
+
# check motion magnitue / add new frame to video
|
85 |
+
if delta.norm(dim=-1).mean().item() > self.thresh:
|
86 |
+
self.count = 0
|
87 |
+
net, inp = self.__context_encoder(inputs[:,[0]])
|
88 |
+
self.net, self.inp, self.fmap = net, inp, gmap
|
89 |
+
self.video.append(tstamp, image[0], None, None, depth, intrinsics / 8.0, gmap, net[0], inp[0], mask)
|
90 |
+
|
91 |
+
else:
|
92 |
+
self.count += 1
|
thirdparty/DROID-SLAM/droid_slam/trajectory_filler.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import torch
|
3 |
+
import lietorch
|
4 |
+
|
5 |
+
from lietorch import SE3
|
6 |
+
from collections import OrderedDict
|
7 |
+
from factor_graph import FactorGraph
|
8 |
+
from droid_net import DroidNet
|
9 |
+
import geom.projective_ops as pops
|
10 |
+
|
11 |
+
|
12 |
+
class PoseTrajectoryFiller:
|
13 |
+
""" This class is used to fill in non-keyframe poses """
|
14 |
+
|
15 |
+
def __init__(self, net, video, device="cuda:0"):
|
16 |
+
|
17 |
+
# split net modules
|
18 |
+
self.cnet = net.cnet
|
19 |
+
self.fnet = net.fnet
|
20 |
+
self.update = net.update
|
21 |
+
|
22 |
+
self.count = 0
|
23 |
+
self.video = video
|
24 |
+
self.device = device
|
25 |
+
|
26 |
+
# mean, std for image normalization
|
27 |
+
self.MEAN = torch.as_tensor([0.485, 0.456, 0.406], device=self.device)[:, None, None]
|
28 |
+
self.STDV = torch.as_tensor([0.229, 0.224, 0.225], device=self.device)[:, None, None]
|
29 |
+
|
30 |
+
@torch.cuda.amp.autocast(enabled=True)
|
31 |
+
def __feature_encoder(self, image):
|
32 |
+
""" features for correlation volume """
|
33 |
+
return self.fnet(image)
|
34 |
+
|
35 |
+
def __fill(self, tstamps, images, intrinsics):
|
36 |
+
""" fill operator """
|
37 |
+
|
38 |
+
tt = torch.as_tensor(tstamps, device="cuda")
|
39 |
+
images = torch.stack(images, 0)
|
40 |
+
intrinsics = torch.stack(intrinsics, 0)
|
41 |
+
inputs = images[:,:,[2,1,0]].to(self.device) / 255.0
|
42 |
+
|
43 |
+
### linear pose interpolation ###
|
44 |
+
N = self.video.counter.value # number of keyframes
|
45 |
+
M = len(tstamps) # 16 frames to fill
|
46 |
+
|
47 |
+
ts = self.video.tstamp[:N] # tstamp of keyframes
|
48 |
+
Ps = SE3(self.video.poses[:N]) # pose of keyframes
|
49 |
+
|
50 |
+
t0 = torch.as_tensor([ts[ts<=t].shape[0] - 1 for t in tstamps])
|
51 |
+
t1 = torch.where(t0<N-1, t0+1, t0)
|
52 |
+
|
53 |
+
dt = ts[t1] - ts[t0] + 1e-3
|
54 |
+
dP = Ps[t1] * Ps[t0].inv()
|
55 |
+
|
56 |
+
v = dP.log() / dt.unsqueeze(-1)
|
57 |
+
w = v * (tt - ts[t0]).unsqueeze(-1)
|
58 |
+
Gs = SE3.exp(w) * Ps[t0]
|
59 |
+
|
60 |
+
# extract features (no need for context features)
|
61 |
+
inputs = inputs.sub_(self.MEAN).div_(self.STDV)
|
62 |
+
fmap = self.__feature_encoder(inputs)
|
63 |
+
|
64 |
+
self.video.counter.value += M
|
65 |
+
self.video[N:N+M] = (tt, images[:,0], Gs.data, 1, None, intrinsics / 8.0, fmap)
|
66 |
+
# print('t0:', t0, 't1:', t1)
|
67 |
+
# print('tt:', tt.shape, '\n', tt)
|
68 |
+
|
69 |
+
# self.video.append(tstamp, image[0], Id, 1.0, depth, intrinsics / 8.0, gmap, net[0,0], inp[0,0], mask)
|
70 |
+
# self.video.append(tstamp, image[0], None, None, depth, intrinsics / 8.0, gmap, net[0], inp[0], mask)
|
71 |
+
|
72 |
+
graph = FactorGraph(self.video, self.update)
|
73 |
+
graph.add_factors(t0.cuda(), torch.arange(N, N+M).cuda())
|
74 |
+
graph.add_factors(t1.cuda(), torch.arange(N, N+M).cuda())
|
75 |
+
# print('graph.ii:', graph.ii)
|
76 |
+
# print('graph.jj:', graph.jj)
|
77 |
+
# print()
|
78 |
+
|
79 |
+
for itr in range(6):
|
80 |
+
graph.update(N, N+M, motion_only=True)
|
81 |
+
|
82 |
+
Gs = SE3(self.video.poses[N:N+M].clone())
|
83 |
+
self.video.counter.value -= M
|
84 |
+
|
85 |
+
return [ Gs ]
|
86 |
+
|
87 |
+
@torch.no_grad()
|
88 |
+
def __call__(self, image_stream):
|
89 |
+
""" fill in poses of non-keyframe images """
|
90 |
+
|
91 |
+
# store all camera poses
|
92 |
+
pose_list = []
|
93 |
+
|
94 |
+
tstamps = []
|
95 |
+
images = []
|
96 |
+
intrinsics = []
|
97 |
+
|
98 |
+
for (tstamp, image, intrinsic) in image_stream:
|
99 |
+
tstamps.append(tstamp)
|
100 |
+
images.append(image)
|
101 |
+
intrinsics.append(intrinsic)
|
102 |
+
|
103 |
+
if len(tstamps) == 16:
|
104 |
+
pose_list += self.__fill(tstamps, images, intrinsics)
|
105 |
+
tstamps, images, intrinsics = [], [], []
|
106 |
+
|
107 |
+
if len(tstamps) > 0:
|
108 |
+
pose_list += self.__fill(tstamps, images, intrinsics)
|
109 |
+
|
110 |
+
# stitch pose segments together
|
111 |
+
return lietorch.cat(pose_list, 0)
|
112 |
+
|
thirdparty/DROID-SLAM/droid_slam/vis_headless.py
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import cv2
|
3 |
+
import lietorch
|
4 |
+
import droid_backends
|
5 |
+
import time
|
6 |
+
import argparse
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
# import os
|
10 |
+
# os.environ['PYOPENGL_PLATFORM'] = 'egl'
|
11 |
+
#os.environ['PYOPENGL_PLATFORM'] = 'osmesa'
|
12 |
+
import open3d as o3d
|
13 |
+
|
14 |
+
# o3d.visualization.webrtc_server.enable_webrtc()
|
15 |
+
|
16 |
+
from lietorch import SE3
|
17 |
+
import geom.projective_ops as pops
|
18 |
+
|
19 |
+
|
20 |
+
CAM_POINTS = np.array([
|
21 |
+
[ 0, 0, 0],
|
22 |
+
[-1, -1, 1.5],
|
23 |
+
[ 1, -1, 1.5],
|
24 |
+
[ 1, 1, 1.5],
|
25 |
+
[-1, 1, 1.5],
|
26 |
+
[-0.5, 1, 1.5],
|
27 |
+
[ 0.5, 1, 1.5],
|
28 |
+
[ 0, 1.2, 1.5]])
|
29 |
+
|
30 |
+
CAM_LINES = np.array([
|
31 |
+
[1,2], [2,3], [3,4], [4,1], [1,0], [0,2], [3,0], [0,4], [5,7], [7,6]])
|
32 |
+
|
33 |
+
def white_balance(img):
|
34 |
+
# from https://stackoverflow.com/questions/46390779/automatic-white-balancing-with-grayworld-assumption
|
35 |
+
result = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
|
36 |
+
avg_a = np.average(result[:, :, 1])
|
37 |
+
avg_b = np.average(result[:, :, 2])
|
38 |
+
result[:, :, 1] = result[:, :, 1] - ((avg_a - 128) * (result[:, :, 0] / 255.0) * 1.1)
|
39 |
+
result[:, :, 2] = result[:, :, 2] - ((avg_b - 128) * (result[:, :, 0] / 255.0) * 1.1)
|
40 |
+
result = cv2.cvtColor(result, cv2.COLOR_LAB2BGR)
|
41 |
+
return result
|
42 |
+
|
43 |
+
|
44 |
+
def create_camera_actor(g, scale=0.05):
|
45 |
+
""" build open3d camera polydata """
|
46 |
+
camera_actor = o3d.geometry.LineSet(
|
47 |
+
points=o3d.utility.Vector3dVector(scale * CAM_POINTS),
|
48 |
+
lines=o3d.utility.Vector2iVector(CAM_LINES))
|
49 |
+
|
50 |
+
color = (g * 1.0, 0.5 * (1-g), 0.9 * (1-g))
|
51 |
+
camera_actor.paint_uniform_color(color)
|
52 |
+
return camera_actor
|
53 |
+
|
54 |
+
|
55 |
+
def create_point_actor(points, colors):
|
56 |
+
""" open3d point cloud from numpy array """
|
57 |
+
point_cloud = o3d.geometry.PointCloud()
|
58 |
+
point_cloud.points = o3d.utility.Vector3dVector(points)
|
59 |
+
point_cloud.colors = o3d.utility.Vector3dVector(colors)
|
60 |
+
return point_cloud
|
61 |
+
|
62 |
+
|
63 |
+
def droid_visualization(video, save_path, device="cuda:0"):
|
64 |
+
""" DROID visualization frontend """
|
65 |
+
|
66 |
+
torch.cuda.set_device(0)
|
67 |
+
droid_visualization.video = video
|
68 |
+
droid_visualization.cameras = {}
|
69 |
+
droid_visualization.points = {}
|
70 |
+
droid_visualization.warmup = 8
|
71 |
+
droid_visualization.scale = 1.0
|
72 |
+
droid_visualization.ix = 0
|
73 |
+
print("headless droid_visualization")
|
74 |
+
|
75 |
+
|
76 |
+
droid_visualization.filter_thresh = 0.3 #0.005
|
77 |
+
|
78 |
+
def increase_filter(vis):
|
79 |
+
droid_visualization.filter_thresh *= 2
|
80 |
+
with droid_visualization.video.get_lock():
|
81 |
+
droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
|
82 |
+
|
83 |
+
def decrease_filter(vis):
|
84 |
+
droid_visualization.filter_thresh *= 0.5
|
85 |
+
with droid_visualization.video.get_lock():
|
86 |
+
droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
|
87 |
+
|
88 |
+
def animation_callback(vis):
|
89 |
+
cam = vis.get_view_control().convert_to_pinhole_camera_parameters()
|
90 |
+
|
91 |
+
with torch.no_grad():
|
92 |
+
|
93 |
+
with video.get_lock():
|
94 |
+
t = video.counter.value
|
95 |
+
dirty_index, = torch.where(video.dirty.clone())
|
96 |
+
dirty_index = dirty_index
|
97 |
+
|
98 |
+
if len(dirty_index) == 0:
|
99 |
+
return
|
100 |
+
|
101 |
+
video.dirty[dirty_index] = False
|
102 |
+
|
103 |
+
# convert poses to 4x4 matrix
|
104 |
+
poses = torch.index_select(video.poses, 0, dirty_index)
|
105 |
+
disps = torch.index_select(video.disps, 0, dirty_index)
|
106 |
+
Ps = SE3(poses).inv().matrix().cpu().numpy()
|
107 |
+
|
108 |
+
images = torch.index_select(video.images, 0, dirty_index)
|
109 |
+
images = images.cpu()[:,[2,1,0],3::8,3::8].permute(0,2,3,1) / 255.0
|
110 |
+
points = droid_backends.iproj(SE3(poses).inv().data, disps, video.intrinsics[0]).cpu()
|
111 |
+
|
112 |
+
thresh = droid_visualization.filter_thresh * torch.ones_like(disps.mean(dim=[1,2]))
|
113 |
+
|
114 |
+
count = droid_backends.depth_filter(
|
115 |
+
video.poses, video.disps, video.intrinsics[0], dirty_index, thresh)
|
116 |
+
|
117 |
+
count = count.cpu()
|
118 |
+
disps = disps.cpu()
|
119 |
+
masks = ((count >= 2) & (disps > .5*disps.mean(dim=[1,2], keepdim=True)))
|
120 |
+
|
121 |
+
for i in range(len(dirty_index)):
|
122 |
+
pose = Ps[i]
|
123 |
+
ix = dirty_index[i].item()
|
124 |
+
|
125 |
+
if ix in droid_visualization.cameras:
|
126 |
+
vis.remove_geometry(droid_visualization.cameras[ix])
|
127 |
+
del droid_visualization.cameras[ix]
|
128 |
+
|
129 |
+
if ix in droid_visualization.points:
|
130 |
+
vis.remove_geometry(droid_visualization.points[ix])
|
131 |
+
del droid_visualization.points[ix]
|
132 |
+
|
133 |
+
### add camera actor ###
|
134 |
+
cam_actor = create_camera_actor(True)
|
135 |
+
cam_actor.transform(pose)
|
136 |
+
vis.add_geometry(cam_actor)
|
137 |
+
droid_visualization.cameras[ix] = cam_actor
|
138 |
+
|
139 |
+
|
140 |
+
mask = masks[i].reshape(-1)
|
141 |
+
pts = points[i].reshape(-1, 3)[mask].cpu().numpy()
|
142 |
+
clr = images[i].reshape(-1, 3)[mask].cpu().numpy()
|
143 |
+
|
144 |
+
## add point actor ###
|
145 |
+
point_actor = create_point_actor(pts, clr)
|
146 |
+
vis.add_geometry(point_actor)
|
147 |
+
droid_visualization.points[ix] = point_actor
|
148 |
+
|
149 |
+
### Hack to save Point Cloud Data and Camnera results ###
|
150 |
+
|
151 |
+
# Save points
|
152 |
+
pcd_points = o3d.geometry.PointCloud()
|
153 |
+
for p in droid_visualization.points.items():
|
154 |
+
pcd_points += p[1]
|
155 |
+
o3d.io.write_point_cloud(f"{save_path}/points.ply", pcd_points, write_ascii=False)
|
156 |
+
|
157 |
+
# Save pose
|
158 |
+
pcd_camera = create_camera_actor(True)
|
159 |
+
for c in droid_visualization.cameras.items():
|
160 |
+
pcd_camera += c[1]
|
161 |
+
|
162 |
+
o3d.io.write_line_set(f"{save_path}/camera.ply", pcd_camera, write_ascii=False)
|
163 |
+
|
164 |
+
### end ###
|
165 |
+
|
166 |
+
# hack to allow interacting with vizualization during inference
|
167 |
+
if len(droid_visualization.cameras) >= droid_visualization.warmup:
|
168 |
+
cam = vis.get_view_control().convert_from_pinhole_camera_parameters(cam)
|
169 |
+
|
170 |
+
droid_visualization.ix += 1
|
171 |
+
vis.poll_events()
|
172 |
+
vis.update_renderer()
|
173 |
+
|
174 |
+
### create Open3D visualization ###
|
175 |
+
vis = o3d.visualization.VisualizerWithKeyCallback()
|
176 |
+
vis.register_animation_callback(animation_callback)
|
177 |
+
vis.register_key_callback(ord("S"), increase_filter)
|
178 |
+
vis.register_key_callback(ord("A"), decrease_filter)
|
179 |
+
|
180 |
+
vis.create_window(height=540, width=960)
|
181 |
+
# vis.create_window(height=512, width=384)
|
182 |
+
vis.get_render_option().load_from_json("thirdparty/DROID-SLAM//misc/renderoption.json")
|
183 |
+
|
184 |
+
vis.run()
|
185 |
+
vis.destroy_window()
|
thirdparty/DROID-SLAM/droid_slam/visualization.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import cv2
|
3 |
+
import lietorch
|
4 |
+
import droid_backends
|
5 |
+
import time
|
6 |
+
import argparse
|
7 |
+
import numpy as np
|
8 |
+
import open3d as o3d
|
9 |
+
|
10 |
+
from lietorch import SE3
|
11 |
+
import geom.projective_ops as pops
|
12 |
+
|
13 |
+
CAM_POINTS = np.array([
|
14 |
+
[ 0, 0, 0],
|
15 |
+
[-1, -1, 1.5],
|
16 |
+
[ 1, -1, 1.5],
|
17 |
+
[ 1, 1, 1.5],
|
18 |
+
[-1, 1, 1.5],
|
19 |
+
[-0.5, 1, 1.5],
|
20 |
+
[ 0.5, 1, 1.5],
|
21 |
+
[ 0, 1.2, 1.5]])
|
22 |
+
|
23 |
+
CAM_LINES = np.array([
|
24 |
+
[1,2], [2,3], [3,4], [4,1], [1,0], [0,2], [3,0], [0,4], [5,7], [7,6]])
|
25 |
+
|
26 |
+
def white_balance(img):
|
27 |
+
# from https://stackoverflow.com/questions/46390779/automatic-white-balancing-with-grayworld-assumption
|
28 |
+
result = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
|
29 |
+
avg_a = np.average(result[:, :, 1])
|
30 |
+
avg_b = np.average(result[:, :, 2])
|
31 |
+
result[:, :, 1] = result[:, :, 1] - ((avg_a - 128) * (result[:, :, 0] / 255.0) * 1.1)
|
32 |
+
result[:, :, 2] = result[:, :, 2] - ((avg_b - 128) * (result[:, :, 0] / 255.0) * 1.1)
|
33 |
+
result = cv2.cvtColor(result, cv2.COLOR_LAB2BGR)
|
34 |
+
return result
|
35 |
+
|
36 |
+
def create_camera_actor(g, scale=0.05):
|
37 |
+
""" build open3d camera polydata """
|
38 |
+
camera_actor = o3d.geometry.LineSet(
|
39 |
+
points=o3d.utility.Vector3dVector(scale * CAM_POINTS),
|
40 |
+
lines=o3d.utility.Vector2iVector(CAM_LINES))
|
41 |
+
|
42 |
+
color = (g * 1.0, 0.5 * (1-g), 0.9 * (1-g))
|
43 |
+
camera_actor.paint_uniform_color(color)
|
44 |
+
return camera_actor
|
45 |
+
|
46 |
+
def create_point_actor(points, colors):
|
47 |
+
""" open3d point cloud from numpy array """
|
48 |
+
point_cloud = o3d.geometry.PointCloud()
|
49 |
+
point_cloud.points = o3d.utility.Vector3dVector(points)
|
50 |
+
point_cloud.colors = o3d.utility.Vector3dVector(colors)
|
51 |
+
return point_cloud
|
52 |
+
|
53 |
+
def droid_visualization(video, device="cuda:0"):
|
54 |
+
""" DROID visualization frontend """
|
55 |
+
|
56 |
+
torch.cuda.set_device(device)
|
57 |
+
droid_visualization.video = video
|
58 |
+
droid_visualization.cameras = {}
|
59 |
+
droid_visualization.points = {}
|
60 |
+
droid_visualization.warmup = 8
|
61 |
+
droid_visualization.scale = 1.0
|
62 |
+
droid_visualization.ix = 0
|
63 |
+
|
64 |
+
droid_visualization.filter_thresh = 0.005
|
65 |
+
|
66 |
+
def increase_filter(vis):
|
67 |
+
droid_visualization.filter_thresh *= 2
|
68 |
+
with droid_visualization.video.get_lock():
|
69 |
+
droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
|
70 |
+
|
71 |
+
def decrease_filter(vis):
|
72 |
+
droid_visualization.filter_thresh *= 0.5
|
73 |
+
with droid_visualization.video.get_lock():
|
74 |
+
droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
|
75 |
+
|
76 |
+
#file dialog based pointcloud export added#
|
77 |
+
def export_pointcloud(vis):
|
78 |
+
gui.Application.instance.initialize()
|
79 |
+
window = gui.Application.instance.create_window("Export", 350, 600)
|
80 |
+
|
81 |
+
def _on_filedlg_cancel():
|
82 |
+
window.close_dialog()
|
83 |
+
window.close()
|
84 |
+
gui.Application.instance.quit()
|
85 |
+
|
86 |
+
def _on_filedlg_done(path):
|
87 |
+
pcd_export(path)
|
88 |
+
window.close_dialog()
|
89 |
+
gui.Application.instance.quit()
|
90 |
+
|
91 |
+
def exec_file_dialog():
|
92 |
+
filedlg = gui.FileDialog(gui.FileDialog.SAVE, "Select file", window.theme)
|
93 |
+
|
94 |
+
filedlg.add_filter(".ply .xyz .pcd", "PointCloud (.xyz .ply .pcd)")
|
95 |
+
filedlg.add_filter("", "All files")
|
96 |
+
filedlg.set_on_cancel(_on_filedlg_cancel)
|
97 |
+
filedlg.set_on_done(_on_filedlg_done)
|
98 |
+
window.show_dialog(filedlg)
|
99 |
+
|
100 |
+
def pcd_export(path):
|
101 |
+
print("\nExporting pointcloud as", path)
|
102 |
+
final_pcd = o3d.geometry.PointCloud()
|
103 |
+
for p in droid_visualization.points.items():
|
104 |
+
final_pcd += p[1]
|
105 |
+
|
106 |
+
o3d.io.write_point_cloud(path, final_pcd, write_ascii=False)
|
107 |
+
#vis.capture_depth_point_cloud("/home/bertuser/droidslam_export.ply")
|
108 |
+
|
109 |
+
exec_file_dialog()
|
110 |
+
|
111 |
+
def animation_callback(vis):
|
112 |
+
cam = vis.get_view_control().convert_to_pinhole_camera_parameters()
|
113 |
+
|
114 |
+
with torch.no_grad():
|
115 |
+
|
116 |
+
with video.get_lock():
|
117 |
+
t = video.counter.value
|
118 |
+
dirty_index, = torch.where(video.dirty.clone())
|
119 |
+
dirty_index = dirty_index
|
120 |
+
|
121 |
+
if len(dirty_index) == 0:
|
122 |
+
return
|
123 |
+
|
124 |
+
video.dirty[dirty_index] = False
|
125 |
+
|
126 |
+
# convert poses to 4x4 matrix
|
127 |
+
poses = torch.index_select(video.poses, 0, dirty_index)
|
128 |
+
disps = torch.index_select(video.disps, 0, dirty_index)
|
129 |
+
Ps = SE3(poses).inv().matrix().cpu().numpy()
|
130 |
+
|
131 |
+
images = torch.index_select(video.images, 0, dirty_index)
|
132 |
+
images = images.cpu()[:,[2,1,0],3::8,3::8].permute(0,2,3,1) / 255.0
|
133 |
+
points = droid_backends.iproj(SE3(poses).inv().data, disps, video.intrinsics[0]).cpu()
|
134 |
+
|
135 |
+
thresh = droid_visualization.filter_thresh * torch.ones_like(disps.mean(dim=[1,2]))
|
136 |
+
|
137 |
+
count = droid_backends.depth_filter(
|
138 |
+
video.poses, video.disps, video.intrinsics[0], dirty_index, thresh)
|
139 |
+
|
140 |
+
count = count.cpu()
|
141 |
+
disps = disps.cpu()
|
142 |
+
masks = ((count >= 2) & (disps > .5*disps.mean(dim=[1,2], keepdim=True)))
|
143 |
+
|
144 |
+
for i in range(len(dirty_index)):
|
145 |
+
pose = Ps[i]
|
146 |
+
ix = dirty_index[i].item()
|
147 |
+
|
148 |
+
if ix in droid_visualization.cameras:
|
149 |
+
vis.remove_geometry(droid_visualization.cameras[ix])
|
150 |
+
del droid_visualization.cameras[ix]
|
151 |
+
|
152 |
+
if ix in droid_visualization.points:
|
153 |
+
vis.remove_geometry(droid_visualization.points[ix])
|
154 |
+
del droid_visualization.points[ix]
|
155 |
+
|
156 |
+
### add camera actor ###
|
157 |
+
cam_actor = create_camera_actor(True)
|
158 |
+
cam_actor.transform(pose)
|
159 |
+
vis.add_geometry(cam_actor)
|
160 |
+
droid_visualization.cameras[ix] = cam_actor
|
161 |
+
|
162 |
+
mask = masks[i].reshape(-1)
|
163 |
+
pts = points[i].reshape(-1, 3)[mask].cpu().numpy()
|
164 |
+
clr = images[i].reshape(-1, 3)[mask].cpu().numpy()
|
165 |
+
|
166 |
+
## add point actor ###
|
167 |
+
point_actor = create_point_actor(pts, clr)
|
168 |
+
vis.add_geometry(point_actor)
|
169 |
+
droid_visualization.points[ix] = point_actor
|
170 |
+
|
171 |
+
# hack to allow interacting with vizualization during inference
|
172 |
+
if len(droid_visualization.cameras) >= droid_visualization.warmup:
|
173 |
+
cam = vis.get_view_control().convert_from_pinhole_camera_parameters(cam)
|
174 |
+
|
175 |
+
droid_visualization.ix += 1
|
176 |
+
vis.poll_events()
|
177 |
+
vis.update_renderer()
|
178 |
+
|
179 |
+
### create Open3D visualization ###
|
180 |
+
vis = o3d.visualization.VisualizerWithKeyCallback()
|
181 |
+
vis.register_animation_callback(animation_callback)
|
182 |
+
vis.register_key_callback(ord("S"), increase_filter)
|
183 |
+
vis.register_key_callback(ord("A"), decrease_filter)
|
184 |
+
|
185 |
+
vis.create_window(height=540, width=960)
|
186 |
+
vis.get_render_option().load_from_json("misc/renderoption.json")
|
187 |
+
|
188 |
+
vis.run()
|
189 |
+
vis.destroy_window()
|
thirdparty/DROID-SLAM/environment.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: droidenv
|
2 |
+
channels:
|
3 |
+
- rusty1s
|
4 |
+
- pytorch
|
5 |
+
- open3d-admin
|
6 |
+
- nvidia
|
7 |
+
- conda-forge
|
8 |
+
- defaults
|
9 |
+
dependencies:
|
10 |
+
- pytorch-scatter
|
11 |
+
- torchaudio
|
12 |
+
- torchvision
|
13 |
+
- open3d
|
14 |
+
- pytorch=1.10
|
15 |
+
- cudatoolkit=11.3
|
16 |
+
- tensorboard
|
17 |
+
- scipy
|
18 |
+
- opencv
|
19 |
+
- tqdm
|
20 |
+
- suitesparse
|
21 |
+
- matplotlib
|
22 |
+
- pyyaml
|
thirdparty/DROID-SLAM/environment_novis.yaml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: droidenv
|
2 |
+
channels:
|
3 |
+
- rusty1s
|
4 |
+
- pytorch
|
5 |
+
- nvidia
|
6 |
+
- conda-forge
|
7 |
+
- defaults
|
8 |
+
dependencies:
|
9 |
+
- pytorch-scatter
|
10 |
+
- torchaudio
|
11 |
+
- torchvision
|
12 |
+
- pytorch=1.10
|
13 |
+
- cudatoolkit=11.3
|
14 |
+
- tensorboard
|
15 |
+
- scipy
|
16 |
+
- opencv
|
17 |
+
- tqdm
|
18 |
+
- suitesparse
|
19 |
+
- matplotlib
|
20 |
+
- pyyaml
|
thirdparty/DROID-SLAM/evaluation_scripts/test_eth3d.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('droid_slam')
|
3 |
+
|
4 |
+
from tqdm import tqdm
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
import lietorch
|
8 |
+
import cv2
|
9 |
+
import os
|
10 |
+
import glob
|
11 |
+
import time
|
12 |
+
import argparse
|
13 |
+
|
14 |
+
import torch.nn.functional as F
|
15 |
+
from droid import Droid
|
16 |
+
|
17 |
+
import matplotlib.pyplot as plt
|
18 |
+
|
19 |
+
|
20 |
+
def show_image(image):
|
21 |
+
image = image.permute(1, 2, 0).cpu().numpy()
|
22 |
+
cv2.imshow('image', image / 255.0)
|
23 |
+
cv2.waitKey(1)
|
24 |
+
|
25 |
+
def image_stream(datapath, use_depth=False, stride=1):
|
26 |
+
""" image generator """
|
27 |
+
|
28 |
+
fx, fy, cx, cy = np.loadtxt(os.path.join(datapath, 'calibration.txt')).tolist()
|
29 |
+
image_list = sorted(glob.glob(os.path.join(datapath, 'rgb', '*.png')))[::stride]
|
30 |
+
depth_list = sorted(glob.glob(os.path.join(datapath, 'depth', '*.png')))[::stride]
|
31 |
+
|
32 |
+
for t, (image_file, depth_file) in enumerate(zip(image_list, depth_list)):
|
33 |
+
image = cv2.imread(image_file)
|
34 |
+
depth = cv2.imread(depth_file, cv2.IMREAD_ANYDEPTH) / 5000.0
|
35 |
+
|
36 |
+
h0, w0, _ = image.shape
|
37 |
+
h1 = int(h0 * np.sqrt((384 * 512) / (h0 * w0)))
|
38 |
+
w1 = int(w0 * np.sqrt((384 * 512) / (h0 * w0)))
|
39 |
+
|
40 |
+
image = cv2.resize(image, (w1, h1))
|
41 |
+
image = image[:h1-h1%8, :w1-w1%8]
|
42 |
+
image = torch.as_tensor(image).permute(2, 0, 1)
|
43 |
+
|
44 |
+
depth = torch.as_tensor(depth)
|
45 |
+
depth = F.interpolate(depth[None,None], (h1, w1)).squeeze()
|
46 |
+
depth = depth[:h1-h1%8, :w1-w1%8]
|
47 |
+
|
48 |
+
intrinsics = torch.as_tensor([fx, fy, cx, cy])
|
49 |
+
intrinsics[0::2] *= (w1 / w0)
|
50 |
+
intrinsics[1::2] *= (h1 / h0)
|
51 |
+
|
52 |
+
if use_depth:
|
53 |
+
yield t, image[None], depth, intrinsics
|
54 |
+
|
55 |
+
else:
|
56 |
+
yield t, image[None], intrinsics
|
57 |
+
|
58 |
+
if __name__ == '__main__':
|
59 |
+
parser = argparse.ArgumentParser()
|
60 |
+
parser.add_argument("--datapath")
|
61 |
+
parser.add_argument("--weights", default="droid.pth")
|
62 |
+
parser.add_argument("--buffer", type=int, default=1024)
|
63 |
+
parser.add_argument("--image_size", default=[240, 320])
|
64 |
+
parser.add_argument("--disable_vis", action="store_true")
|
65 |
+
|
66 |
+
parser.add_argument("--beta", type=float, default=0.5)
|
67 |
+
parser.add_argument("--filter_thresh", type=float, default=2.0)
|
68 |
+
parser.add_argument("--warmup", type=int, default=8)
|
69 |
+
parser.add_argument("--keyframe_thresh", type=float, default=3.5)
|
70 |
+
parser.add_argument("--frontend_thresh", type=float, default=16.0)
|
71 |
+
parser.add_argument("--frontend_window", type=int, default=16)
|
72 |
+
parser.add_argument("--frontend_radius", type=int, default=1)
|
73 |
+
parser.add_argument("--frontend_nms", type=int, default=0)
|
74 |
+
|
75 |
+
parser.add_argument("--stereo", action="store_true")
|
76 |
+
parser.add_argument("--depth", action="store_true")
|
77 |
+
|
78 |
+
parser.add_argument("--backend_thresh", type=float, default=22.0)
|
79 |
+
parser.add_argument("--backend_radius", type=int, default=2)
|
80 |
+
parser.add_argument("--backend_nms", type=int, default=3)
|
81 |
+
args = parser.parse_args()
|
82 |
+
|
83 |
+
torch.multiprocessing.set_start_method('spawn')
|
84 |
+
|
85 |
+
print("Running evaluation on {}".format(args.datapath))
|
86 |
+
print(args)
|
87 |
+
|
88 |
+
# this can usually be set to 2-3 except for "camera_shake" scenes
|
89 |
+
# set to 2 for test scenes
|
90 |
+
stride = 1
|
91 |
+
|
92 |
+
tstamps = []
|
93 |
+
for (t, image, depth, intrinsics) in tqdm(image_stream(args.datapath, use_depth=True, stride=stride)):
|
94 |
+
if not args.disable_vis:
|
95 |
+
show_image(image[0])
|
96 |
+
|
97 |
+
if t == 0:
|
98 |
+
args.image_size = [image.shape[2], image.shape[3]]
|
99 |
+
droid = Droid(args)
|
100 |
+
|
101 |
+
droid.track(t, image, depth, intrinsics=intrinsics)
|
102 |
+
|
103 |
+
traj_est = droid.terminate(image_stream(args.datapath, use_depth=False, stride=stride))
|
104 |
+
|
105 |
+
### run evaluation ###
|
106 |
+
|
107 |
+
print("#"*20 + " Results...")
|
108 |
+
|
109 |
+
import evo
|
110 |
+
from evo.core.trajectory import PoseTrajectory3D
|
111 |
+
from evo.tools import file_interface
|
112 |
+
from evo.core import sync
|
113 |
+
import evo.main_ape as main_ape
|
114 |
+
from evo.core.metrics import PoseRelation
|
115 |
+
|
116 |
+
image_path = os.path.join(args.datapath, 'rgb')
|
117 |
+
images_list = sorted(glob.glob(os.path.join(image_path, '*.png')))[::stride]
|
118 |
+
tstamps = [float(x.split('/')[-1][:-4]) for x in images_list]
|
119 |
+
|
120 |
+
traj_est = PoseTrajectory3D(
|
121 |
+
positions_xyz=traj_est[:,:3],
|
122 |
+
orientations_quat_wxyz=traj_est[:,3:],
|
123 |
+
timestamps=np.array(tstamps))
|
124 |
+
|
125 |
+
gt_file = os.path.join(args.datapath, 'groundtruth.txt')
|
126 |
+
traj_ref = file_interface.read_tum_trajectory_file(gt_file)
|
127 |
+
|
128 |
+
traj_ref, traj_est = sync.associate_trajectories(traj_ref, traj_est)
|
129 |
+
|
130 |
+
result = main_ape.ape(traj_ref, traj_est, est_name='traj',
|
131 |
+
pose_relation=PoseRelation.translation_part, align=True, correct_scale=False)
|
132 |
+
|
133 |
+
print(result.stats)
|
134 |
+
|
thirdparty/DROID-SLAM/evaluation_scripts/test_euroc.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('droid_slam')
|
3 |
+
|
4 |
+
from tqdm import tqdm
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
import lietorch
|
8 |
+
import cv2
|
9 |
+
import os
|
10 |
+
import glob
|
11 |
+
import time
|
12 |
+
import argparse
|
13 |
+
|
14 |
+
from torch.multiprocessing import Process
|
15 |
+
from droid import Droid
|
16 |
+
|
17 |
+
import torch.nn.functional as F
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
def show_image(image):
|
22 |
+
image = image.permute(1, 2, 0).cpu().numpy()
|
23 |
+
cv2.imshow('image', image / 255.0)
|
24 |
+
cv2.waitKey(1)
|
25 |
+
|
26 |
+
def image_stream(datapath, image_size=[320, 512], stereo=False, stride=1):
|
27 |
+
""" image generator """
|
28 |
+
|
29 |
+
K_l = np.array([458.654, 0.0, 367.215, 0.0, 457.296, 248.375, 0.0, 0.0, 1.0]).reshape(3,3)
|
30 |
+
d_l = np.array([-0.28340811, 0.07395907, 0.00019359, 1.76187114e-05, 0.0])
|
31 |
+
R_l = np.array([
|
32 |
+
0.999966347530033, -0.001422739138722922, 0.008079580483432283,
|
33 |
+
0.001365741834644127, 0.9999741760894847, 0.007055629199258132,
|
34 |
+
-0.008089410156878961, -0.007044357138835809, 0.9999424675829176
|
35 |
+
]).reshape(3,3)
|
36 |
+
|
37 |
+
P_l = np.array([435.2046959714599, 0, 367.4517211914062, 0, 0, 435.2046959714599, 252.2008514404297, 0, 0, 0, 1, 0]).reshape(3,4)
|
38 |
+
map_l = cv2.initUndistortRectifyMap(K_l, d_l, R_l, P_l[:3,:3], (752, 480), cv2.CV_32F)
|
39 |
+
|
40 |
+
K_r = np.array([457.587, 0.0, 379.999, 0.0, 456.134, 255.238, 0.0, 0.0, 1]).reshape(3,3)
|
41 |
+
d_r = np.array([-0.28368365, 0.07451284, -0.00010473, -3.555907e-05, 0.0]).reshape(5)
|
42 |
+
R_r = np.array([
|
43 |
+
0.9999633526194376, -0.003625811871560086, 0.007755443660172947,
|
44 |
+
0.003680398547259526, 0.9999684752771629, -0.007035845251224894,
|
45 |
+
-0.007729688520722713, 0.007064130529506649, 0.999945173484644
|
46 |
+
]).reshape(3,3)
|
47 |
+
|
48 |
+
P_r = np.array([435.2046959714599, 0, 367.4517211914062, -47.90639384423901, 0, 435.2046959714599, 252.2008514404297, 0, 0, 0, 1, 0]).reshape(3,4)
|
49 |
+
map_r = cv2.initUndistortRectifyMap(K_r, d_r, R_r, P_r[:3,:3], (752, 480), cv2.CV_32F)
|
50 |
+
|
51 |
+
intrinsics_vec = [435.2046959714599, 435.2046959714599, 367.4517211914062, 252.2008514404297]
|
52 |
+
ht0, wd0 = [480, 752]
|
53 |
+
|
54 |
+
# read all png images in folder
|
55 |
+
images_left = sorted(glob.glob(os.path.join(datapath, 'mav0/cam0/data/*.png')))[::stride]
|
56 |
+
images_right = [x.replace('cam0', 'cam1') for x in images_left]
|
57 |
+
|
58 |
+
for t, (imgL, imgR) in enumerate(zip(images_left, images_right)):
|
59 |
+
if stereo and not os.path.isfile(imgR):
|
60 |
+
continue
|
61 |
+
tstamp = float(imgL.split('/')[-1][:-4])
|
62 |
+
images = [cv2.remap(cv2.imread(imgL), map_l[0], map_l[1], interpolation=cv2.INTER_LINEAR)]
|
63 |
+
if stereo:
|
64 |
+
images += [cv2.remap(cv2.imread(imgR), map_r[0], map_r[1], interpolation=cv2.INTER_LINEAR)]
|
65 |
+
|
66 |
+
images = torch.from_numpy(np.stack(images, 0))
|
67 |
+
images = images.permute(0, 3, 1, 2).to("cuda:0", dtype=torch.float32)
|
68 |
+
images = F.interpolate(images, image_size, mode="bilinear", align_corners=False)
|
69 |
+
|
70 |
+
intrinsics = torch.as_tensor(intrinsics_vec).cuda()
|
71 |
+
intrinsics[0] *= image_size[1] / wd0
|
72 |
+
intrinsics[1] *= image_size[0] / ht0
|
73 |
+
intrinsics[2] *= image_size[1] / wd0
|
74 |
+
intrinsics[3] *= image_size[0] / ht0
|
75 |
+
|
76 |
+
yield stride*t, images, intrinsics
|
77 |
+
|
78 |
+
|
79 |
+
if __name__ == '__main__':
|
80 |
+
parser = argparse.ArgumentParser()
|
81 |
+
parser.add_argument("--datapath", help="path to euroc sequence")
|
82 |
+
parser.add_argument("--gt", help="path to gt file")
|
83 |
+
parser.add_argument("--weights", default="droid.pth")
|
84 |
+
parser.add_argument("--buffer", type=int, default=512)
|
85 |
+
parser.add_argument("--image_size", default=[320,512])
|
86 |
+
parser.add_argument("--disable_vis", action="store_true")
|
87 |
+
parser.add_argument("--stereo", action="store_true")
|
88 |
+
|
89 |
+
parser.add_argument("--beta", type=float, default=0.3)
|
90 |
+
parser.add_argument("--filter_thresh", type=float, default=2.4)
|
91 |
+
parser.add_argument("--warmup", type=int, default=15)
|
92 |
+
parser.add_argument("--keyframe_thresh", type=float, default=3.5)
|
93 |
+
parser.add_argument("--frontend_thresh", type=float, default=17.5)
|
94 |
+
parser.add_argument("--frontend_window", type=int, default=20)
|
95 |
+
parser.add_argument("--frontend_radius", type=int, default=2)
|
96 |
+
parser.add_argument("--frontend_nms", type=int, default=1)
|
97 |
+
|
98 |
+
parser.add_argument("--backend_thresh", type=float, default=24.0)
|
99 |
+
parser.add_argument("--backend_radius", type=int, default=2)
|
100 |
+
parser.add_argument("--backend_nms", type=int, default=2)
|
101 |
+
args = parser.parse_args()
|
102 |
+
|
103 |
+
torch.multiprocessing.set_start_method('spawn')
|
104 |
+
|
105 |
+
print("Running evaluation on {}".format(args.datapath))
|
106 |
+
print(args)
|
107 |
+
|
108 |
+
droid = Droid(args)
|
109 |
+
time.sleep(5)
|
110 |
+
|
111 |
+
for (t, image, intrinsics) in tqdm(image_stream(args.datapath, stereo=args.stereo, stride=2)):
|
112 |
+
droid.track(t, image, intrinsics=intrinsics)
|
113 |
+
|
114 |
+
traj_est = droid.terminate(image_stream(args.datapath, stride=1))
|
115 |
+
|
116 |
+
### run evaluation ###
|
117 |
+
|
118 |
+
import evo
|
119 |
+
from evo.core.trajectory import PoseTrajectory3D
|
120 |
+
from evo.tools import file_interface
|
121 |
+
from evo.core import sync
|
122 |
+
import evo.main_ape as main_ape
|
123 |
+
from evo.core.metrics import PoseRelation
|
124 |
+
|
125 |
+
images_list = sorted(glob.glob(os.path.join(args.datapath, 'mav0/cam0/data/*.png')))
|
126 |
+
tstamps = [float(x.split('/')[-1][:-4]) for x in images_list]
|
127 |
+
|
128 |
+
traj_est = PoseTrajectory3D(
|
129 |
+
positions_xyz=1.10 * traj_est[:,:3],
|
130 |
+
orientations_quat_wxyz=traj_est[:,3:],
|
131 |
+
timestamps=np.array(tstamps))
|
132 |
+
|
133 |
+
traj_ref = file_interface.read_tum_trajectory_file(args.gt)
|
134 |
+
|
135 |
+
traj_ref, traj_est = sync.associate_trajectories(traj_ref, traj_est)
|
136 |
+
|
137 |
+
result = main_ape.ape(traj_ref, traj_est, est_name='traj',
|
138 |
+
pose_relation=PoseRelation.translation_part, align=True, correct_scale=True)
|
139 |
+
|
140 |
+
print(result)
|
141 |
+
|
142 |
+
|
thirdparty/DROID-SLAM/evaluation_scripts/test_tum.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('droid_slam')
|
3 |
+
|
4 |
+
from tqdm import tqdm
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
import lietorch
|
8 |
+
import cv2
|
9 |
+
import os
|
10 |
+
import glob
|
11 |
+
import time
|
12 |
+
import argparse
|
13 |
+
|
14 |
+
import torch.nn.functional as F
|
15 |
+
from droid import Droid
|
16 |
+
|
17 |
+
|
18 |
+
def show_image(image):
|
19 |
+
image = image.permute(1, 2, 0).cpu().numpy()
|
20 |
+
cv2.imshow('image', image / 255.0)
|
21 |
+
cv2.waitKey(1)
|
22 |
+
|
23 |
+
def image_stream(datapath, image_size=[320, 512]):
|
24 |
+
""" image generator """
|
25 |
+
|
26 |
+
fx, fy, cx, cy = 517.3, 516.5, 318.6, 255.3
|
27 |
+
|
28 |
+
K_l = np.array([fx, 0.0, cx, 0.0, fy, cy, 0.0, 0.0, 1.0]).reshape(3,3)
|
29 |
+
d_l = np.array([0.2624, -0.9531, -0.0054, 0.0026, 1.1633])
|
30 |
+
|
31 |
+
# read all png images in folder
|
32 |
+
images_list = sorted(glob.glob(os.path.join(datapath, 'rgb', '*.png')))[::2]
|
33 |
+
|
34 |
+
for t, imfile in enumerate(images_list):
|
35 |
+
image = cv2.imread(imfile)
|
36 |
+
ht0, wd0, _ = image.shape
|
37 |
+
image = cv2.undistort(image, K_l, d_l)
|
38 |
+
image = cv2.resize(image, (320+32, 240+16))
|
39 |
+
image = torch.from_numpy(image).permute(2,0,1)
|
40 |
+
|
41 |
+
intrinsics = torch.as_tensor([fx, fy, cx, cy]).cuda()
|
42 |
+
intrinsics[0] *= image.shape[2] / 640.0
|
43 |
+
intrinsics[1] *= image.shape[1] / 480.0
|
44 |
+
intrinsics[2] *= image.shape[2] / 640.0
|
45 |
+
intrinsics[3] *= image.shape[1] / 480.0
|
46 |
+
|
47 |
+
# crop image to remove distortion boundary
|
48 |
+
intrinsics[2] -= 16
|
49 |
+
intrinsics[3] -= 8
|
50 |
+
image = image[:, 8:-8, 16:-16]
|
51 |
+
|
52 |
+
yield t, image[None], intrinsics
|
53 |
+
|
54 |
+
if __name__ == '__main__':
|
55 |
+
parser = argparse.ArgumentParser()
|
56 |
+
parser.add_argument("--datapath")
|
57 |
+
parser.add_argument("--weights", default="droid.pth")
|
58 |
+
parser.add_argument("--buffer", type=int, default=512)
|
59 |
+
parser.add_argument("--image_size", default=[240, 320])
|
60 |
+
parser.add_argument("--disable_vis", action="store_true")
|
61 |
+
|
62 |
+
parser.add_argument("--beta", type=float, default=0.6)
|
63 |
+
parser.add_argument("--filter_thresh", type=float, default=1.75)
|
64 |
+
parser.add_argument("--warmup", type=int, default=12)
|
65 |
+
parser.add_argument("--keyframe_thresh", type=float, default=2.25)
|
66 |
+
parser.add_argument("--frontend_thresh", type=float, default=12.0)
|
67 |
+
parser.add_argument("--frontend_window", type=int, default=25)
|
68 |
+
parser.add_argument("--frontend_radius", type=int, default=2)
|
69 |
+
parser.add_argument("--frontend_nms", type=int, default=1)
|
70 |
+
|
71 |
+
parser.add_argument("--backend_thresh", type=float, default=15.0)
|
72 |
+
parser.add_argument("--backend_radius", type=int, default=2)
|
73 |
+
parser.add_argument("--backend_nms", type=int, default=3)
|
74 |
+
args = parser.parse_args()
|
75 |
+
|
76 |
+
args.stereo = False
|
77 |
+
torch.multiprocessing.set_start_method('spawn')
|
78 |
+
|
79 |
+
print("Running evaluation on {}".format(args.datapath))
|
80 |
+
print(args)
|
81 |
+
|
82 |
+
droid = Droid(args)
|
83 |
+
time.sleep(5)
|
84 |
+
|
85 |
+
tstamps = []
|
86 |
+
for (t, image, intrinsics) in tqdm(image_stream(args.datapath)):
|
87 |
+
if not args.disable_vis:
|
88 |
+
show_image(image)
|
89 |
+
droid.track(t, image, intrinsics=intrinsics)
|
90 |
+
|
91 |
+
|
92 |
+
traj_est = droid.terminate(image_stream(args.datapath))
|
93 |
+
|
94 |
+
### run evaluation ###
|
95 |
+
|
96 |
+
print("#"*20 + " Results...")
|
97 |
+
|
98 |
+
import evo
|
99 |
+
from evo.core.trajectory import PoseTrajectory3D
|
100 |
+
from evo.tools import file_interface
|
101 |
+
from evo.core import sync
|
102 |
+
import evo.main_ape as main_ape
|
103 |
+
from evo.core.metrics import PoseRelation
|
104 |
+
|
105 |
+
image_path = os.path.join(args.datapath, 'rgb')
|
106 |
+
images_list = sorted(glob.glob(os.path.join(image_path, '*.png')))[::2]
|
107 |
+
tstamps = [float(x.split('/')[-1][:-4]) for x in images_list]
|
108 |
+
|
109 |
+
traj_est = PoseTrajectory3D(
|
110 |
+
positions_xyz=traj_est[:,:3],
|
111 |
+
orientations_quat_wxyz=traj_est[:,3:],
|
112 |
+
timestamps=np.array(tstamps))
|
113 |
+
|
114 |
+
gt_file = os.path.join(args.datapath, 'groundtruth.txt')
|
115 |
+
traj_ref = file_interface.read_tum_trajectory_file(gt_file)
|
116 |
+
|
117 |
+
traj_ref, traj_est = sync.associate_trajectories(traj_ref, traj_est)
|
118 |
+
result = main_ape.ape(traj_ref, traj_est, est_name='traj',
|
119 |
+
pose_relation=PoseRelation.translation_part, align=True, correct_scale=True)
|
120 |
+
|
121 |
+
|
122 |
+
print(result)
|
123 |
+
|
thirdparty/DROID-SLAM/evaluation_scripts/validate_tartanair.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('droid_slam')
|
3 |
+
sys.path.append('thirdparty/tartanair_tools')
|
4 |
+
|
5 |
+
from tqdm import tqdm
|
6 |
+
import numpy as np
|
7 |
+
import torch
|
8 |
+
import lietorch
|
9 |
+
import cv2
|
10 |
+
import os
|
11 |
+
import glob
|
12 |
+
import time
|
13 |
+
import yaml
|
14 |
+
import argparse
|
15 |
+
|
16 |
+
from droid import Droid
|
17 |
+
|
18 |
+
def image_stream(datapath, image_size=[384, 512], intrinsics_vec=[320.0, 320.0, 320.0, 240.0], stereo=False):
|
19 |
+
""" image generator """
|
20 |
+
|
21 |
+
# read all png images in folder
|
22 |
+
ht0, wd0 = [480, 640]
|
23 |
+
images_left = sorted(glob.glob(os.path.join(datapath, 'image_left/*.png')))
|
24 |
+
images_right = sorted(glob.glob(os.path.join(datapath, 'image_right/*.png')))
|
25 |
+
|
26 |
+
data = []
|
27 |
+
for t in range(len(images_left)):
|
28 |
+
images = [ cv2.resize(cv2.imread(images_left[t]), (image_size[1], image_size[0])) ]
|
29 |
+
if stereo:
|
30 |
+
images += [ cv2.resize(cv2.imread(images_right[t]), (image_size[1], image_size[0])) ]
|
31 |
+
|
32 |
+
images = torch.from_numpy(np.stack(images, 0)).permute(0,3,1,2)
|
33 |
+
intrinsics = .8 * torch.as_tensor(intrinsics_vec)
|
34 |
+
|
35 |
+
data.append((t, images, intrinsics))
|
36 |
+
|
37 |
+
return data
|
38 |
+
|
39 |
+
|
40 |
+
if __name__ == '__main__':
|
41 |
+
parser = argparse.ArgumentParser()
|
42 |
+
parser.add_argument("--datapath", default="datasets/TartanAir")
|
43 |
+
parser.add_argument("--weights", default="droid.pth")
|
44 |
+
parser.add_argument("--buffer", type=int, default=1000)
|
45 |
+
parser.add_argument("--image_size", default=[384,512])
|
46 |
+
parser.add_argument("--stereo", action="store_true")
|
47 |
+
parser.add_argument("--disable_vis", action="store_true")
|
48 |
+
parser.add_argument("--plot_curve", action="store_true")
|
49 |
+
parser.add_argument("--id", type=int, default=-1)
|
50 |
+
|
51 |
+
parser.add_argument("--beta", type=float, default=0.3)
|
52 |
+
parser.add_argument("--filter_thresh", type=float, default=2.4)
|
53 |
+
parser.add_argument("--warmup", type=int, default=12)
|
54 |
+
parser.add_argument("--keyframe_thresh", type=float, default=3.5)
|
55 |
+
parser.add_argument("--frontend_thresh", type=float, default=15)
|
56 |
+
parser.add_argument("--frontend_window", type=int, default=20)
|
57 |
+
parser.add_argument("--frontend_radius", type=int, default=1)
|
58 |
+
parser.add_argument("--frontend_nms", type=int, default=1)
|
59 |
+
|
60 |
+
parser.add_argument("--backend_thresh", type=float, default=20.0)
|
61 |
+
parser.add_argument("--backend_radius", type=int, default=2)
|
62 |
+
parser.add_argument("--backend_nms", type=int, default=3)
|
63 |
+
|
64 |
+
args = parser.parse_args()
|
65 |
+
torch.multiprocessing.set_start_method('spawn')
|
66 |
+
|
67 |
+
from data_readers.tartan import test_split
|
68 |
+
from evaluation.tartanair_evaluator import TartanAirEvaluator
|
69 |
+
|
70 |
+
if not os.path.isdir("figures"):
|
71 |
+
os.mkdir("figures")
|
72 |
+
|
73 |
+
if args.id >= 0:
|
74 |
+
test_split = [ test_split[args.id] ]
|
75 |
+
|
76 |
+
ate_list = []
|
77 |
+
for scene in test_split:
|
78 |
+
print("Performing evaluation on {}".format(scene))
|
79 |
+
torch.cuda.empty_cache()
|
80 |
+
droid = Droid(args)
|
81 |
+
|
82 |
+
scenedir = os.path.join(args.datapath, scene)
|
83 |
+
|
84 |
+
for (tstamp, image, intrinsics) in tqdm(image_stream(scenedir, stereo=args.stereo)):
|
85 |
+
droid.track(tstamp, image, intrinsics=intrinsics)
|
86 |
+
|
87 |
+
# fill in non-keyframe poses + global BA
|
88 |
+
traj_est = droid.terminate(image_stream(scenedir))
|
89 |
+
|
90 |
+
### do evaluation ###
|
91 |
+
evaluator = TartanAirEvaluator()
|
92 |
+
gt_file = os.path.join(scenedir, "pose_left.txt")
|
93 |
+
traj_ref = np.loadtxt(gt_file, delimiter=' ')[:, [1, 2, 0, 4, 5, 3, 6]] # ned -> xyz
|
94 |
+
|
95 |
+
# usually stereo should not be scale corrected, but we are comparing monocular and stereo here
|
96 |
+
results = evaluator.evaluate_one_trajectory(
|
97 |
+
traj_ref, traj_est, scale=True, title=scenedir[-20:].replace('/', '_'))
|
98 |
+
|
99 |
+
print(results)
|
100 |
+
ate_list.append(results["ate_score"])
|
101 |
+
|
102 |
+
print("Results")
|
103 |
+
print(ate_list)
|
104 |
+
|
105 |
+
if args.plot_curve:
|
106 |
+
import matplotlib.pyplot as plt
|
107 |
+
ate = np.array(ate_list)
|
108 |
+
xs = np.linspace(0.0, 1.0, 512)
|
109 |
+
ys = [np.count_nonzero(ate < t) / ate.shape[0] for t in xs]
|
110 |
+
|
111 |
+
plt.plot(xs, ys)
|
112 |
+
plt.xlabel("ATE [m]")
|
113 |
+
plt.ylabel("% runs")
|
114 |
+
plt.show()
|
115 |
+
|
thirdparty/DROID-SLAM/misc/DROID.png
ADDED
![]() |
Git LFS Details
|
thirdparty/DROID-SLAM/misc/renderoption.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"background_color" : [ 1, 1, 1 ],
|
3 |
+
"class_name" : "RenderOption",
|
4 |
+
"default_mesh_color" : [ 0.69999999999999996, 0.69999999999999996, 0.69999999999999996 ],
|
5 |
+
"image_max_depth" : 3000,
|
6 |
+
"image_stretch_option" : 0,
|
7 |
+
"interpolation_option" : 0,
|
8 |
+
"light0_color" : [ 1, 1, 1 ],
|
9 |
+
"light0_diffuse_power" : 20,
|
10 |
+
"light0_position" : [ 0, 0, 20 ],
|
11 |
+
"light0_specular_power" : 2.20000000000000001,
|
12 |
+
"light0_specular_shininess" : 100,
|
13 |
+
"light1_color" : [ 1, 1, 1 ],
|
14 |
+
"light1_diffuse_power" : 0.66000000000000003,
|
15 |
+
"light1_position" : [ 0, 0, 2 ],
|
16 |
+
"light1_specular_power" : 2.20000000000000001,
|
17 |
+
"light1_specular_shininess" : 100,
|
18 |
+
"light2_color" : [ 1, 1, 1 ],
|
19 |
+
"light2_diffuse_power" : 20,
|
20 |
+
"light2_position" : [ 0, 0, -20 ],
|
21 |
+
"light2_specular_power" : 2.20000000000000001,
|
22 |
+
"light2_specular_shininess" : 100,
|
23 |
+
"light3_color" : [ 1, 1, 1 ],
|
24 |
+
"light3_diffuse_power" : 20,
|
25 |
+
"light3_position" : [ 0, 0, -20 ],
|
26 |
+
"light3_specular_power" : 2.20000000000000001,
|
27 |
+
"light3_specular_shininess" : 100,
|
28 |
+
"light_ambient_color" : [ 0, 0, 0 ],
|
29 |
+
"light_on" : true,
|
30 |
+
"mesh_color_option" : 1,
|
31 |
+
"mesh_shade_option" : 0,
|
32 |
+
"mesh_show_back_face" : false,
|
33 |
+
"mesh_show_wireframe" : false,
|
34 |
+
"point_color_option" : 7,
|
35 |
+
"point_show_normal" : false,
|
36 |
+
"point_size" : 2,
|
37 |
+
"show_coordinate_frame" : false,
|
38 |
+
"version_major" : 1,
|
39 |
+
"version_minor" : 0
|
40 |
+
}
|
thirdparty/DROID-SLAM/misc/screenshot.png
ADDED
![]() |
Git LFS Details
|
thirdparty/DROID-SLAM/setup.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from setuptools import setup
|
2 |
+
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
|
3 |
+
|
4 |
+
import os.path as osp
|
5 |
+
ROOT = osp.dirname(osp.abspath(__file__))
|
6 |
+
|
7 |
+
setup(
|
8 |
+
name='droid_backends',
|
9 |
+
ext_modules=[
|
10 |
+
CUDAExtension('droid_backends',
|
11 |
+
include_dirs=[osp.join(ROOT, 'thirdparty/eigen')],
|
12 |
+
sources=[
|
13 |
+
'src/droid.cpp',
|
14 |
+
'src/droid_kernels.cu',
|
15 |
+
'src/correlation_kernels.cu',
|
16 |
+
'src/altcorr_kernel.cu',
|
17 |
+
],
|
18 |
+
extra_compile_args={
|
19 |
+
'cxx': ['-O3'],
|
20 |
+
'nvcc': ['-O3',
|
21 |
+
'-gencode=arch=compute_60,code=sm_60',
|
22 |
+
'-gencode=arch=compute_61,code=sm_61',
|
23 |
+
'-gencode=arch=compute_70,code=sm_70',
|
24 |
+
'-gencode=arch=compute_75,code=sm_75',
|
25 |
+
'-gencode=arch=compute_80,code=sm_80',
|
26 |
+
'-gencode=arch=compute_86,code=sm_86',
|
27 |
+
]
|
28 |
+
}),
|
29 |
+
],
|
30 |
+
cmdclass={ 'build_ext' : BuildExtension }
|
31 |
+
)
|
32 |
+
|
33 |
+
setup(
|
34 |
+
name='lietorch',
|
35 |
+
version='0.2',
|
36 |
+
description='Lie Groups for PyTorch',
|
37 |
+
packages=['lietorch'],
|
38 |
+
package_dir={'': 'thirdparty/lietorch'},
|
39 |
+
ext_modules=[
|
40 |
+
CUDAExtension('lietorch_backends',
|
41 |
+
include_dirs=[
|
42 |
+
osp.join(ROOT, 'thirdparty/lietorch/lietorch/include'),
|
43 |
+
osp.join(ROOT, 'thirdparty/eigen')],
|
44 |
+
sources=[
|
45 |
+
'thirdparty/lietorch/lietorch/src/lietorch.cpp',
|
46 |
+
'thirdparty/lietorch/lietorch/src/lietorch_gpu.cu',
|
47 |
+
'thirdparty/lietorch/lietorch/src/lietorch_cpu.cpp'],
|
48 |
+
extra_compile_args={
|
49 |
+
'cxx': ['-O2'],
|
50 |
+
'nvcc': ['-O2',
|
51 |
+
'-gencode=arch=compute_60,code=sm_60',
|
52 |
+
'-gencode=arch=compute_61,code=sm_61',
|
53 |
+
'-gencode=arch=compute_70,code=sm_70',
|
54 |
+
'-gencode=arch=compute_75,code=sm_75',
|
55 |
+
'-gencode=arch=compute_80,code=sm_80',
|
56 |
+
'-gencode=arch=compute_86,code=sm_86',
|
57 |
+
]
|
58 |
+
}),
|
59 |
+
],
|
60 |
+
cmdclass={ 'build_ext' : BuildExtension }
|
61 |
+
)
|
thirdparty/DROID-SLAM/src/altcorr_kernel.cu
ADDED
@@ -0,0 +1,356 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <torch/extension.h>
|
2 |
+
#include <cuda.h>
|
3 |
+
#include <cuda_runtime.h>
|
4 |
+
#include <vector>
|
5 |
+
#include <cuda_fp16.h>
|
6 |
+
#include <cuda_runtime.h>
|
7 |
+
|
8 |
+
|
9 |
+
#include <ATen/ATen.h>
|
10 |
+
#include <ATen/NativeFunctions.h>
|
11 |
+
#include <ATen/cuda/CUDAApplyUtils.cuh>
|
12 |
+
#include <ATen/native/cuda/KernelUtils.cuh>
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
#define BLOCK_H 4
|
17 |
+
#define BLOCK_W 8
|
18 |
+
#define BLOCK_HW BLOCK_H * BLOCK_W
|
19 |
+
#define CHANNEL_STRIDE 32
|
20 |
+
|
21 |
+
|
22 |
+
__forceinline__ __device__
|
23 |
+
bool within_bounds(int h, int w, int H, int W) {
|
24 |
+
return h >= 0 && h < H && w >= 0 && w < W;
|
25 |
+
}
|
26 |
+
|
27 |
+
template <typename scalar_t>
|
28 |
+
__global__ void altcorr_forward_kernel(
|
29 |
+
const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap1,
|
30 |
+
const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap2,
|
31 |
+
const torch::PackedTensorAccessor32<float,5,torch::RestrictPtrTraits> coords,
|
32 |
+
torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr,
|
33 |
+
int r)
|
34 |
+
{
|
35 |
+
const int b = blockIdx.x;
|
36 |
+
const int h0 = blockIdx.y * blockDim.x;
|
37 |
+
const int w0 = blockIdx.z * blockDim.y;
|
38 |
+
const int tid = threadIdx.x * blockDim.y + threadIdx.y;
|
39 |
+
|
40 |
+
const int H1 = fmap1.size(1);
|
41 |
+
const int W1 = fmap1.size(2);
|
42 |
+
const int H2 = fmap2.size(1);
|
43 |
+
const int W2 = fmap2.size(2);
|
44 |
+
const int N = coords.size(1);
|
45 |
+
const int C = fmap1.size(3);
|
46 |
+
|
47 |
+
__shared__ scalar_t f1[CHANNEL_STRIDE][BLOCK_HW];
|
48 |
+
__shared__ scalar_t f2[CHANNEL_STRIDE][BLOCK_HW];
|
49 |
+
|
50 |
+
__shared__ float x2s[BLOCK_HW];
|
51 |
+
__shared__ float y2s[BLOCK_HW];
|
52 |
+
|
53 |
+
for (int c=0; c<C; c+=CHANNEL_STRIDE) {
|
54 |
+
for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
|
55 |
+
int k1 = k + tid / CHANNEL_STRIDE;
|
56 |
+
int h1 = h0 + k1 / BLOCK_W;
|
57 |
+
int w1 = w0 + k1 % BLOCK_W;
|
58 |
+
int c1 = tid % CHANNEL_STRIDE;
|
59 |
+
|
60 |
+
if (within_bounds(h1, w1, H1, W1))
|
61 |
+
f1[c1][k1] = fmap1[b][h1][w1][c+c1];
|
62 |
+
|
63 |
+
else
|
64 |
+
f1[c1][k1] = 0.0;
|
65 |
+
}
|
66 |
+
|
67 |
+
__syncthreads();
|
68 |
+
|
69 |
+
for (int n=0; n<N; n++) {
|
70 |
+
int h1 = h0 + threadIdx.x;
|
71 |
+
int w1 = w0 + threadIdx.y;
|
72 |
+
if (within_bounds(h1, w1, H1, W1)) {
|
73 |
+
x2s[tid] = coords[b][n][h1][w1][0];
|
74 |
+
y2s[tid] = coords[b][n][h1][w1][1];
|
75 |
+
}
|
76 |
+
|
77 |
+
float dx = x2s[tid] - floor(x2s[tid]);
|
78 |
+
float dy = y2s[tid] - floor(y2s[tid]);
|
79 |
+
|
80 |
+
int rd = 2*r + 1;
|
81 |
+
for (int iy=0; iy<rd+1; iy++) {
|
82 |
+
for (int ix=0; ix<rd+1; ix++) {
|
83 |
+
for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
|
84 |
+
int k1 = k + tid / CHANNEL_STRIDE;
|
85 |
+
int h2 = static_cast<int>(floor(y2s[k1])) - r + iy;
|
86 |
+
int w2 = static_cast<int>(floor(x2s[k1])) - r + ix;
|
87 |
+
int c2 = tid % CHANNEL_STRIDE;
|
88 |
+
|
89 |
+
if (within_bounds(h2, w2, H2, W2))
|
90 |
+
f2[c2][k1] = fmap2[b][h2][w2][c+c2];
|
91 |
+
|
92 |
+
else
|
93 |
+
f2[c2][k1] = static_cast<scalar_t>(0.0);
|
94 |
+
}
|
95 |
+
|
96 |
+
__syncthreads();
|
97 |
+
|
98 |
+
scalar_t s = 0.0;
|
99 |
+
for (int k=0; k<CHANNEL_STRIDE; k++)
|
100 |
+
s += f1[k][tid] * f2[k][tid];
|
101 |
+
|
102 |
+
int ix_nw = H1*W1*((iy-1) + rd*(ix-1));
|
103 |
+
int ix_ne = H1*W1*((iy-1) + rd*ix);
|
104 |
+
int ix_sw = H1*W1*(iy + rd*(ix-1));
|
105 |
+
int ix_se = H1*W1*(iy + rd*ix);
|
106 |
+
|
107 |
+
// int ix_nw = ((iy-1) + rd*(ix-1));
|
108 |
+
// int ix_ne = ((iy-1) + rd*ix);
|
109 |
+
// int ix_sw = (iy + rd*(ix-1));
|
110 |
+
// int ix_se = (iy + rd*ix);
|
111 |
+
|
112 |
+
scalar_t nw = s * static_cast<scalar_t>((dy) * (dx));
|
113 |
+
scalar_t ne = s * static_cast<scalar_t>((dy) * (1-dx));
|
114 |
+
scalar_t sw = s * static_cast<scalar_t>((1-dy) * (dx));
|
115 |
+
scalar_t se = s * static_cast<scalar_t>((1-dy) * (1-dx));
|
116 |
+
|
117 |
+
// if (iy > 0 && ix > 0 && within_bounds(h1, w1, H1, W1))
|
118 |
+
// corr[b][n][ix_nw][h1][w1] += nw;
|
119 |
+
|
120 |
+
// if (iy > 0 && ix < rd && within_bounds(h1, w1, H1, W1))
|
121 |
+
// corr[b][n][ix_ne][h1][w1] += ne;
|
122 |
+
|
123 |
+
// if (iy < rd && ix > 0 && within_bounds(h1, w1, H1, W1))
|
124 |
+
// corr[b][n][ix_sw][h1][w1] += sw;
|
125 |
+
|
126 |
+
// if (iy < rd && ix < rd && within_bounds(h1, w1, H1, W1))
|
127 |
+
// corr[b][n][ix_se][h1][w1] += se;
|
128 |
+
|
129 |
+
|
130 |
+
scalar_t* corr_ptr = &corr[b][n][0][h1][w1];
|
131 |
+
|
132 |
+
if (iy > 0 && ix > 0 && within_bounds(h1, w1, H1, W1))
|
133 |
+
*(corr_ptr + ix_nw) += nw;
|
134 |
+
|
135 |
+
if (iy > 0 && ix < rd && within_bounds(h1, w1, H1, W1))
|
136 |
+
*(corr_ptr + ix_ne) += ne;
|
137 |
+
|
138 |
+
if (iy < rd && ix > 0 && within_bounds(h1, w1, H1, W1))
|
139 |
+
*(corr_ptr + ix_sw) += sw;
|
140 |
+
|
141 |
+
if (iy < rd && ix < rd && within_bounds(h1, w1, H1, W1))
|
142 |
+
*(corr_ptr + ix_se) += se;
|
143 |
+
|
144 |
+
|
145 |
+
}
|
146 |
+
}
|
147 |
+
}
|
148 |
+
}
|
149 |
+
}
|
150 |
+
|
151 |
+
|
152 |
+
template <typename scalar_t>
|
153 |
+
__global__ void altcorr_backward_kernel(
|
154 |
+
const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap1,
|
155 |
+
const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap2,
|
156 |
+
const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> coords,
|
157 |
+
const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr_grad,
|
158 |
+
torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap1_grad,
|
159 |
+
torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap2_grad,
|
160 |
+
torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> coords_grad,
|
161 |
+
int r)
|
162 |
+
{
|
163 |
+
|
164 |
+
const int b = blockIdx.x;
|
165 |
+
const int h0 = blockIdx.y * blockDim.x;
|
166 |
+
const int w0 = blockIdx.z * blockDim.y;
|
167 |
+
const int tid = threadIdx.x * blockDim.y + threadIdx.y;
|
168 |
+
|
169 |
+
const int H1 = fmap1.size(1);
|
170 |
+
const int W1 = fmap1.size(2);
|
171 |
+
const int H2 = fmap2.size(1);
|
172 |
+
const int W2 = fmap2.size(2);
|
173 |
+
const int N = coords.size(1);
|
174 |
+
const int C = fmap1.size(3);
|
175 |
+
|
176 |
+
__shared__ scalar_t f1[CHANNEL_STRIDE][BLOCK_HW+1];
|
177 |
+
__shared__ scalar_t f2[CHANNEL_STRIDE][BLOCK_HW+1];
|
178 |
+
|
179 |
+
__shared__ scalar_t f1_grad[CHANNEL_STRIDE][BLOCK_HW+1];
|
180 |
+
__shared__ scalar_t f2_grad[CHANNEL_STRIDE][BLOCK_HW+1];
|
181 |
+
|
182 |
+
__shared__ scalar_t x2s[BLOCK_HW];
|
183 |
+
__shared__ scalar_t y2s[BLOCK_HW];
|
184 |
+
|
185 |
+
for (int c=0; c<C; c+=CHANNEL_STRIDE) {
|
186 |
+
|
187 |
+
for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
|
188 |
+
int k1 = k + tid / CHANNEL_STRIDE;
|
189 |
+
int h1 = h0 + k1 / BLOCK_W;
|
190 |
+
int w1 = w0 + k1 % BLOCK_W;
|
191 |
+
int c1 = tid % CHANNEL_STRIDE;
|
192 |
+
|
193 |
+
auto fptr = fmap1[b][h1][w1];
|
194 |
+
if (within_bounds(h1, w1, H1, W1))
|
195 |
+
f1[c1][k1] = fptr[c+c1];
|
196 |
+
else
|
197 |
+
f1[c1][k1] = 0.0;
|
198 |
+
|
199 |
+
f1_grad[c1][k1] = 0.0;
|
200 |
+
}
|
201 |
+
|
202 |
+
__syncthreads();
|
203 |
+
|
204 |
+
int h1 = h0 + threadIdx.x;
|
205 |
+
int w1 = w0 + threadIdx.y;
|
206 |
+
|
207 |
+
for (int n=0; n<N; n++) {
|
208 |
+
x2s[tid] = coords[b][n][h1][w1][0];
|
209 |
+
y2s[tid] = coords[b][n][h1][w1][1];
|
210 |
+
|
211 |
+
scalar_t dx = x2s[tid] - floor(x2s[tid]);
|
212 |
+
scalar_t dy = y2s[tid] - floor(y2s[tid]);
|
213 |
+
|
214 |
+
int rd = 2*r + 1;
|
215 |
+
for (int iy=0; iy<rd+1; iy++) {
|
216 |
+
for (int ix=0; ix<rd+1; ix++) {
|
217 |
+
for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
|
218 |
+
int k1 = k + tid / CHANNEL_STRIDE;
|
219 |
+
int h2 = static_cast<int>(floor(y2s[k1]))-r+iy;
|
220 |
+
int w2 = static_cast<int>(floor(x2s[k1]))-r+ix;
|
221 |
+
int c2 = tid % CHANNEL_STRIDE;
|
222 |
+
|
223 |
+
auto fptr = fmap2[b][h2][w2];
|
224 |
+
if (within_bounds(h2, w2, H2, W2))
|
225 |
+
f2[c2][k1] = fptr[c+c2];
|
226 |
+
else
|
227 |
+
f2[c2][k1] = 0.0;
|
228 |
+
|
229 |
+
f2_grad[c2][k1] = 0.0;
|
230 |
+
}
|
231 |
+
|
232 |
+
__syncthreads();
|
233 |
+
|
234 |
+
const scalar_t* grad_ptr = &corr_grad[b][n][0][h1][w1];
|
235 |
+
scalar_t g = 0.0;
|
236 |
+
|
237 |
+
int ix_nw = H1*W1*((iy-1) + rd*(ix-1));
|
238 |
+
int ix_ne = H1*W1*((iy-1) + rd*ix);
|
239 |
+
int ix_sw = H1*W1*(iy + rd*(ix-1));
|
240 |
+
int ix_se = H1*W1*(iy + rd*ix);
|
241 |
+
|
242 |
+
if (iy > 0 && ix > 0 && within_bounds(h1, w1, H1, W1))
|
243 |
+
g += *(grad_ptr + ix_nw) * dy * dx;
|
244 |
+
|
245 |
+
if (iy > 0 && ix < rd && within_bounds(h1, w1, H1, W1))
|
246 |
+
g += *(grad_ptr + ix_ne) * dy * (1-dx);
|
247 |
+
|
248 |
+
if (iy < rd && ix > 0 && within_bounds(h1, w1, H1, W1))
|
249 |
+
g += *(grad_ptr + ix_sw) * (1-dy) * dx;
|
250 |
+
|
251 |
+
if (iy < rd && ix < rd && within_bounds(h1, w1, H1, W1))
|
252 |
+
g += *(grad_ptr + ix_se) * (1-dy) * (1-dx);
|
253 |
+
|
254 |
+
for (int k=0; k<CHANNEL_STRIDE; k++) {
|
255 |
+
f1_grad[k][tid] += g * f2[k][tid];
|
256 |
+
f2_grad[k][tid] += g * f1[k][tid];
|
257 |
+
}
|
258 |
+
|
259 |
+
for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
|
260 |
+
int k1 = k + tid / CHANNEL_STRIDE;
|
261 |
+
int h2 = static_cast<int>(floor(y2s[k1]))-r+iy;
|
262 |
+
int w2 = static_cast<int>(floor(x2s[k1]))-r+ix;
|
263 |
+
int c2 = tid % CHANNEL_STRIDE;
|
264 |
+
|
265 |
+
scalar_t* fptr = &fmap2_grad[b][h2][w2][0];
|
266 |
+
if (within_bounds(h2, w2, H2, W2))
|
267 |
+
atomicAdd(fptr+c+c2, f2_grad[c2][k1]);
|
268 |
+
}
|
269 |
+
}
|
270 |
+
}
|
271 |
+
}
|
272 |
+
__syncthreads();
|
273 |
+
|
274 |
+
|
275 |
+
for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
|
276 |
+
int k1 = k + tid / CHANNEL_STRIDE;
|
277 |
+
int h1 = h0 + k1 / BLOCK_W;
|
278 |
+
int w1 = w0 + k1 % BLOCK_W;
|
279 |
+
int c1 = tid % CHANNEL_STRIDE;
|
280 |
+
|
281 |
+
scalar_t* fptr = &fmap1_grad[b][h1][w1][0];
|
282 |
+
if (within_bounds(h1, w1, H1, W1))
|
283 |
+
fptr[c+c1] += f1_grad[c1][k1];
|
284 |
+
}
|
285 |
+
}
|
286 |
+
}
|
287 |
+
|
288 |
+
|
289 |
+
|
290 |
+
std::vector<torch::Tensor> altcorr_cuda_forward(
|
291 |
+
torch::Tensor fmap1,
|
292 |
+
torch::Tensor fmap2,
|
293 |
+
torch::Tensor coords,
|
294 |
+
int radius)
|
295 |
+
{
|
296 |
+
const auto B = coords.size(0);
|
297 |
+
const auto N = coords.size(1);
|
298 |
+
const auto H = coords.size(2);
|
299 |
+
const auto W = coords.size(3);
|
300 |
+
|
301 |
+
const auto rd = 2 * radius + 1;
|
302 |
+
auto opts = fmap1.options();
|
303 |
+
auto corr = torch::zeros({B, N, rd*rd, H, W}, opts);
|
304 |
+
|
305 |
+
const dim3 blocks(B, (H+BLOCK_H-1)/BLOCK_H, (W+BLOCK_W-1)/BLOCK_W);
|
306 |
+
const dim3 threads(BLOCK_H, BLOCK_W);
|
307 |
+
|
308 |
+
|
309 |
+
AT_DISPATCH_FLOATING_TYPES_AND_HALF(fmap1.type(), "altcorr_forward_kernel", ([&] {
|
310 |
+
altcorr_forward_kernel<scalar_t><<<blocks, threads>>>(
|
311 |
+
fmap1.packed_accessor32<scalar_t,4,torch::RestrictPtrTraits>(),
|
312 |
+
fmap2.packed_accessor32<scalar_t,4,torch::RestrictPtrTraits>(),
|
313 |
+
coords.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
|
314 |
+
corr.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
|
315 |
+
radius);
|
316 |
+
}));
|
317 |
+
|
318 |
+
return {corr};
|
319 |
+
}
|
320 |
+
|
321 |
+
std::vector<torch::Tensor> altcorr_cuda_backward(
|
322 |
+
torch::Tensor fmap1,
|
323 |
+
torch::Tensor fmap2,
|
324 |
+
torch::Tensor coords,
|
325 |
+
torch::Tensor corr_grad,
|
326 |
+
int radius)
|
327 |
+
{
|
328 |
+
const auto B = coords.size(0);
|
329 |
+
const auto N = coords.size(1);
|
330 |
+
|
331 |
+
const auto H1 = fmap1.size(1);
|
332 |
+
const auto W1 = fmap1.size(2);
|
333 |
+
const auto H2 = fmap2.size(1);
|
334 |
+
const auto W2 = fmap2.size(2);
|
335 |
+
const auto C = fmap1.size(3);
|
336 |
+
|
337 |
+
auto opts = fmap1.options();
|
338 |
+
auto fmap1_grad = torch::zeros({B, H1, W1, C}, opts);
|
339 |
+
auto fmap2_grad = torch::zeros({B, H2, W2, C}, opts);
|
340 |
+
auto coords_grad = torch::zeros({B, N, H1, W1, 2}, opts);
|
341 |
+
|
342 |
+
const dim3 blocks(B, (H1+BLOCK_H-1)/BLOCK_H, (W1+BLOCK_W-1)/BLOCK_W);
|
343 |
+
const dim3 threads(BLOCK_H, BLOCK_W);
|
344 |
+
|
345 |
+
altcorr_backward_kernel<float><<<blocks, threads>>>(
|
346 |
+
fmap1.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
|
347 |
+
fmap2.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
|
348 |
+
coords.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
|
349 |
+
corr_grad.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
|
350 |
+
fmap1_grad.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
|
351 |
+
fmap2_grad.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
|
352 |
+
coords_grad.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
|
353 |
+
radius);
|
354 |
+
|
355 |
+
return {fmap1_grad, fmap2_grad, coords_grad};
|
356 |
+
}
|
thirdparty/DROID-SLAM/src/correlation_kernels.cu
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <torch/extension.h>
|
2 |
+
#include <cuda.h>
|
3 |
+
#include <cuda_runtime.h>
|
4 |
+
#include <vector>
|
5 |
+
#include <cuda_fp16.h>
|
6 |
+
#include <cuda_runtime.h>
|
7 |
+
|
8 |
+
|
9 |
+
#include <ATen/ATen.h>
|
10 |
+
#include <ATen/NativeFunctions.h>
|
11 |
+
#include <ATen/Parallel.h>
|
12 |
+
|
13 |
+
#define BLOCK 16
|
14 |
+
|
15 |
+
__forceinline__ __device__ bool within_bounds(int h, int w, int H, int W) {
|
16 |
+
return h >= 0 && h < H && w >= 0 && w < W;
|
17 |
+
}
|
18 |
+
|
19 |
+
template <typename scalar_t>
|
20 |
+
__global__ void corr_index_forward_kernel(
|
21 |
+
const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> volume,
|
22 |
+
const torch::PackedTensorAccessor32<float,4,torch::RestrictPtrTraits> coords,
|
23 |
+
torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr,
|
24 |
+
int r)
|
25 |
+
{
|
26 |
+
// batch index
|
27 |
+
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
28 |
+
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
29 |
+
const int n = blockIdx.z;
|
30 |
+
|
31 |
+
const int h1 = volume.size(1);
|
32 |
+
const int w1 = volume.size(2);
|
33 |
+
const int h2 = volume.size(3);
|
34 |
+
const int w2 = volume.size(4);
|
35 |
+
|
36 |
+
if (!within_bounds(y, x, h1, w1)) {
|
37 |
+
return;
|
38 |
+
}
|
39 |
+
|
40 |
+
float x0 = coords[n][0][y][x];
|
41 |
+
float y0 = coords[n][1][y][x];
|
42 |
+
|
43 |
+
float dx = x0 - floor(x0);
|
44 |
+
float dy = y0 - floor(y0);
|
45 |
+
|
46 |
+
int rd = 2*r + 1;
|
47 |
+
for (int i=0; i<rd+1; i++) {
|
48 |
+
for (int j=0; j<rd+1; j++) {
|
49 |
+
int x1 = static_cast<int>(floor(x0)) - r + i;
|
50 |
+
int y1 = static_cast<int>(floor(y0)) - r + j;
|
51 |
+
|
52 |
+
if (within_bounds(y1, x1, h2, w2)) {
|
53 |
+
scalar_t s = volume[n][y][x][y1][x1];
|
54 |
+
|
55 |
+
if (i > 0 && j > 0)
|
56 |
+
corr[n][i-1][j-1][y][x] += s * scalar_t(dx * dy);
|
57 |
+
|
58 |
+
if (i > 0 && j < rd)
|
59 |
+
corr[n][i-1][j][y][x] += s * scalar_t(dx * (1.0f-dy));
|
60 |
+
|
61 |
+
if (i < rd && j > 0)
|
62 |
+
corr[n][i][j-1][y][x] += s * scalar_t((1.0f-dx) * dy);
|
63 |
+
|
64 |
+
if (i < rd && j < rd)
|
65 |
+
corr[n][i][j][y][x] += s * scalar_t((1.0f-dx) * (1.0f-dy));
|
66 |
+
|
67 |
+
}
|
68 |
+
}
|
69 |
+
}
|
70 |
+
}
|
71 |
+
|
72 |
+
|
73 |
+
template <typename scalar_t>
|
74 |
+
__global__ void corr_index_backward_kernel(
|
75 |
+
const torch::PackedTensorAccessor32<float,4,torch::RestrictPtrTraits> coords,
|
76 |
+
const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr_grad,
|
77 |
+
torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> volume_grad,
|
78 |
+
int r)
|
79 |
+
{
|
80 |
+
// batch index
|
81 |
+
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
82 |
+
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
83 |
+
const int n = blockIdx.z;
|
84 |
+
|
85 |
+
const int h1 = volume_grad.size(1);
|
86 |
+
const int w1 = volume_grad.size(2);
|
87 |
+
const int h2 = volume_grad.size(3);
|
88 |
+
const int w2 = volume_grad.size(4);
|
89 |
+
|
90 |
+
if (!within_bounds(y, x, h1, w1)) {
|
91 |
+
return;
|
92 |
+
}
|
93 |
+
|
94 |
+
float x0 = coords[n][0][y][x];
|
95 |
+
float y0 = coords[n][1][y][x];
|
96 |
+
|
97 |
+
float dx = x0 - floor(x0);
|
98 |
+
float dy = y0 - floor(y0);
|
99 |
+
|
100 |
+
int rd = 2*r + 1;
|
101 |
+
for (int i=0; i<rd+1; i++) {
|
102 |
+
for (int j=0; j<rd+1; j++) {
|
103 |
+
int x1 = static_cast<int>(floor(x0)) - r + i;
|
104 |
+
int y1 = static_cast<int>(floor(y0)) - r + j;
|
105 |
+
|
106 |
+
if (within_bounds(y1, x1, h2, w2)) {
|
107 |
+
scalar_t g = 0.0;
|
108 |
+
if (i > 0 && j > 0)
|
109 |
+
g += corr_grad[n][i-1][j-1][y][x] * scalar_t(dx * dy);
|
110 |
+
|
111 |
+
if (i > 0 && j < rd)
|
112 |
+
g += corr_grad[n][i-1][j][y][x] * scalar_t(dx * (1.0f-dy));
|
113 |
+
|
114 |
+
if (i < rd && j > 0)
|
115 |
+
g += corr_grad[n][i][j-1][y][x] * scalar_t((1.0f-dx) * dy);
|
116 |
+
|
117 |
+
if (i < rd && j < rd)
|
118 |
+
g += corr_grad[n][i][j][y][x] * scalar_t((1.0f-dx) * (1.0f-dy));
|
119 |
+
|
120 |
+
volume_grad[n][y][x][y1][x1] += g;
|
121 |
+
}
|
122 |
+
}
|
123 |
+
}
|
124 |
+
}
|
125 |
+
|
126 |
+
std::vector<torch::Tensor> corr_index_cuda_forward(
|
127 |
+
torch::Tensor volume,
|
128 |
+
torch::Tensor coords,
|
129 |
+
int radius)
|
130 |
+
{
|
131 |
+
const auto batch_size = volume.size(0);
|
132 |
+
const auto ht = volume.size(1);
|
133 |
+
const auto wd = volume.size(2);
|
134 |
+
|
135 |
+
const dim3 blocks((wd + BLOCK - 1) / BLOCK,
|
136 |
+
(ht + BLOCK - 1) / BLOCK,
|
137 |
+
batch_size);
|
138 |
+
|
139 |
+
const dim3 threads(BLOCK, BLOCK);
|
140 |
+
|
141 |
+
auto opts = volume.options();
|
142 |
+
torch::Tensor corr = torch::zeros(
|
143 |
+
{batch_size, 2*radius+1, 2*radius+1, ht, wd}, opts);
|
144 |
+
|
145 |
+
AT_DISPATCH_FLOATING_TYPES_AND_HALF(volume.type(), "sampler_forward_kernel", ([&] {
|
146 |
+
corr_index_forward_kernel<scalar_t><<<blocks, threads>>>(
|
147 |
+
volume.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
|
148 |
+
coords.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
|
149 |
+
corr.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
|
150 |
+
radius);
|
151 |
+
}));
|
152 |
+
|
153 |
+
return {corr};
|
154 |
+
|
155 |
+
}
|
156 |
+
|
157 |
+
std::vector<torch::Tensor> corr_index_cuda_backward(
|
158 |
+
torch::Tensor volume,
|
159 |
+
torch::Tensor coords,
|
160 |
+
torch::Tensor corr_grad,
|
161 |
+
int radius)
|
162 |
+
{
|
163 |
+
const auto batch_size = volume.size(0);
|
164 |
+
const auto ht = volume.size(1);
|
165 |
+
const auto wd = volume.size(2);
|
166 |
+
|
167 |
+
auto volume_grad = torch::zeros_like(volume);
|
168 |
+
|
169 |
+
const dim3 blocks((wd + BLOCK - 1) / BLOCK,
|
170 |
+
(ht + BLOCK - 1) / BLOCK,
|
171 |
+
batch_size);
|
172 |
+
|
173 |
+
const dim3 threads(BLOCK, BLOCK);
|
174 |
+
|
175 |
+
|
176 |
+
AT_DISPATCH_FLOATING_TYPES_AND_HALF(volume.type(), "sampler_backward_kernel", ([&] {
|
177 |
+
corr_index_backward_kernel<scalar_t><<<blocks, threads>>>(
|
178 |
+
coords.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
|
179 |
+
corr_grad.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
|
180 |
+
volume_grad.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
|
181 |
+
radius);
|
182 |
+
}));
|
183 |
+
|
184 |
+
return {volume_grad};
|
185 |
+
}
|