Upload 4 files
Browse files- .gitattributes +1 -0
- example_input.jpg +0 -0
- examples.png +3 -0
- genex_world_initializer_pipeline.py +175 -0
- pano_mask.png +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
examples.png filter=lfs diff=lfs merge=lfs -text
|
example_input.jpg
ADDED
![]() |
examples.png
ADDED
![]() |
Git LFS Details
|
genex_world_initializer_pipeline.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from diffusers import FluxFillPipeline
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
class GenExWorldInitializerPipeline(FluxFillPipeline):
|
6 |
+
def precompute_rotation_matrix(self, rx, ry, rz):
|
7 |
+
rx = np.deg2rad(rx)
|
8 |
+
ry = np.deg2rad(ry)
|
9 |
+
rz = np.deg2rad(rz)
|
10 |
+
|
11 |
+
Rx = np.array([
|
12 |
+
[1, 0, 0],
|
13 |
+
[0, np.cos(rx), -np.sin(rx)],
|
14 |
+
[0, np.sin(rx), np.cos(rx)]
|
15 |
+
])
|
16 |
+
|
17 |
+
Ry = np.array([
|
18 |
+
[np.cos(ry), 0, np.sin(ry)],
|
19 |
+
[0, 1, 0],
|
20 |
+
[-np.sin(ry), 0, np.cos(ry)]
|
21 |
+
])
|
22 |
+
|
23 |
+
Rz = np.array([
|
24 |
+
[np.cos(rz), -np.sin(rz), 0],
|
25 |
+
[np.sin(rz), np.cos(rz), 0],
|
26 |
+
[0, 0, 1]
|
27 |
+
])
|
28 |
+
|
29 |
+
R = Rz @ Ry @ Rx
|
30 |
+
return R
|
31 |
+
|
32 |
+
def cubemap_to_equirectangular(self, cubemap_faces, output_width, output_height, scale_factor=2):
|
33 |
+
scaled_output_width = output_width * scale_factor
|
34 |
+
scaled_output_height = output_height * scale_factor
|
35 |
+
|
36 |
+
rx, ry, rz = 90, -90, 180
|
37 |
+
R = self.precompute_rotation_matrix(rx, ry, rz)
|
38 |
+
|
39 |
+
x = np.linspace(0, scaled_output_width - 1, scaled_output_width)
|
40 |
+
y = np.linspace(0, scaled_output_height - 1, scaled_output_height)
|
41 |
+
xv, yv = np.meshgrid(x, y)
|
42 |
+
|
43 |
+
theta = (xv / scaled_output_width) * 2 * np.pi - np.pi
|
44 |
+
phi = (yv / scaled_output_height) * np.pi - (np.pi / 2)
|
45 |
+
|
46 |
+
xs = np.cos(phi) * np.cos(theta)
|
47 |
+
ys = np.cos(phi) * np.sin(theta)
|
48 |
+
zs = np.sin(phi)
|
49 |
+
|
50 |
+
def apply_rotation(x, y, z):
|
51 |
+
return R @ np.array([x, y, z])
|
52 |
+
|
53 |
+
xs, ys, zs = apply_rotation(xs.flatten(), ys.flatten(), zs.flatten())
|
54 |
+
xs = xs.reshape((scaled_output_height, scaled_output_width))
|
55 |
+
ys = ys.reshape((scaled_output_height, scaled_output_width))
|
56 |
+
zs = zs.reshape((scaled_output_height, scaled_output_width))
|
57 |
+
|
58 |
+
abs_x, abs_y, abs_z = np.abs(xs), np.abs(ys), np.abs(zs)
|
59 |
+
face_indices = np.argmax(np.stack([abs_x, abs_y, abs_z], axis=-1), axis=-1)
|
60 |
+
|
61 |
+
equirectangular_pixels = np.zeros((scaled_output_height, scaled_output_width, 3), dtype=np.uint8)
|
62 |
+
|
63 |
+
for face_name, face_image in cubemap_faces.items():
|
64 |
+
face_image = np.array(face_image)
|
65 |
+
if face_name == 'right':
|
66 |
+
mask = (face_indices == 0) & (xs > 0)
|
67 |
+
u = (-zs[mask] / abs_x[mask] + 1) / 2
|
68 |
+
v = (ys[mask] / abs_x[mask] + 1) / 2
|
69 |
+
elif face_name == 'left':
|
70 |
+
mask = (face_indices == 0) & (xs < 0)
|
71 |
+
u = (zs[mask] / abs_x[mask] + 1) / 2
|
72 |
+
v = (ys[mask] / abs_x[mask] + 1) / 2
|
73 |
+
elif face_name == 'bottom':
|
74 |
+
mask = (face_indices == 1) & (ys > 0)
|
75 |
+
u = (xs[mask] / abs_y[mask] + 1) / 2
|
76 |
+
v = (-zs[mask] / abs_y[mask] + 1) / 2
|
77 |
+
elif face_name == 'top':
|
78 |
+
mask = (face_indices == 1) & (ys < 0)
|
79 |
+
u = (xs[mask] / abs_y[mask] + 1) / 2
|
80 |
+
v = (zs[mask] / abs_y[mask] + 1) / 2
|
81 |
+
elif face_name == 'front':
|
82 |
+
mask = (face_indices == 2) & (zs > 0)
|
83 |
+
u = (xs[mask] / abs_z[mask] + 1) / 2
|
84 |
+
v = (ys[mask] / abs_z[mask] + 1) / 2
|
85 |
+
elif face_name == 'back':
|
86 |
+
mask = (face_indices == 2) & (zs < 0)
|
87 |
+
u = (-xs[mask] / abs_z[mask] + 1) / 2
|
88 |
+
v = (ys[mask] / abs_z[mask] + 1) / 2
|
89 |
+
|
90 |
+
face_height, face_width, _ = face_image.shape
|
91 |
+
u_pixel = np.clip((u * face_width).astype(int), 0, face_width - 1)
|
92 |
+
v_pixel = np.clip((v * face_height).astype(int), 0, face_height - 1)
|
93 |
+
|
94 |
+
mask = mask.astype(bool)
|
95 |
+
|
96 |
+
masked_yv = yv[mask]
|
97 |
+
masked_xv = xv[mask]
|
98 |
+
|
99 |
+
masked_yv = masked_yv.astype(int)
|
100 |
+
masked_xv = masked_xv.astype(int)
|
101 |
+
|
102 |
+
equirectangular_pixels[masked_yv, masked_xv] = face_image[v_pixel, u_pixel]
|
103 |
+
|
104 |
+
equirectangular_image = Image.fromarray(equirectangular_pixels)
|
105 |
+
|
106 |
+
if scale_factor > 1:
|
107 |
+
equirectangular_image = equirectangular_image.resize((output_width, output_height), Image.LANCZOS)
|
108 |
+
|
109 |
+
return equirectangular_image
|
110 |
+
|
111 |
+
def preprocess_image(self, image: Image.Image) -> Image.Image:
|
112 |
+
w, h = image.size
|
113 |
+
side = min(w, h)
|
114 |
+
left = (w - side) // 2
|
115 |
+
top = (h - side) // 2
|
116 |
+
img = image.crop((left, top, left + side, top + side))
|
117 |
+
front = img.resize((512, 512))
|
118 |
+
|
119 |
+
cubes = {}
|
120 |
+
cubes['front'] = front
|
121 |
+
cubes['back'] = Image.new("RGB", (512, 512), (255, 255, 255))
|
122 |
+
cubes['left'] = Image.new("RGB", (512, 512), (255, 255, 255))
|
123 |
+
cubes['right'] = Image.new("RGB", (512, 512), (255, 255, 255))
|
124 |
+
cubes['top'] = Image.new("RGB", (512, 512), (255, 255, 255))
|
125 |
+
cubes['bottom'] = Image.new("RGB", (512, 512), (255, 255, 255))
|
126 |
+
|
127 |
+
input_panorama = self.cubemap_to_equirectangular(cubes, 2048, 1024, scale_factor=2)
|
128 |
+
|
129 |
+
return front, input_panorama
|
130 |
+
|
131 |
+
|
132 |
+
def preprocess_mask(self) -> Image.Image:
|
133 |
+
mask = Image.open("pano_mask.png").convert("L")
|
134 |
+
return mask.resize((2048, 1024))
|
135 |
+
|
136 |
+
def create_mask(self) -> Image.Image:
|
137 |
+
cubes = {}
|
138 |
+
cubes['front'] = Image.new("RGB", (512, 512), (0, 0, 0))
|
139 |
+
cubes['back'] = Image.new("RGB", (512, 512), (255, 255, 255))
|
140 |
+
cubes['left'] = Image.new("RGB", (512, 512), (255, 255, 255))
|
141 |
+
cubes['right'] = Image.new("RGB", (512, 512), (255, 255, 255))
|
142 |
+
cubes['top'] = Image.new("RGB", (512, 512), (255, 255, 255))
|
143 |
+
cubes['bottom'] = Image.new("RGB", (512, 512), (255, 255, 255))
|
144 |
+
|
145 |
+
mask = self.cubemap_to_equirectangular(cubes, 2048, 1024, scale_factor=1)
|
146 |
+
|
147 |
+
mask = mask.convert("L")
|
148 |
+
|
149 |
+
return mask
|
150 |
+
|
151 |
+
|
152 |
+
def __call__(
|
153 |
+
self,
|
154 |
+
image: Image.Image,
|
155 |
+
prompt: str = None,
|
156 |
+
guidance_scale: float = 3.5,
|
157 |
+
):
|
158 |
+
front, img = self.preprocess_image(image)
|
159 |
+
# mask = self.preprocess_mask()
|
160 |
+
mask = self.create_mask()
|
161 |
+
|
162 |
+
|
163 |
+
if prompt:
|
164 |
+
prompt = 'GenEx Panoramic World Initialization, ' + prompt
|
165 |
+
else:
|
166 |
+
prompt = 'GenEx Panoramic World Initialization'
|
167 |
+
|
168 |
+
return front, super().__call__(
|
169 |
+
prompt=prompt,
|
170 |
+
image=img,
|
171 |
+
mask_image=mask,
|
172 |
+
guidance_scale=guidance_scale,
|
173 |
+
width=2048,
|
174 |
+
height=1024,
|
175 |
+
)
|
pano_mask.png
ADDED
![]() |