Spaces:
Running
on
Zero
Running
on
Zero
tight-inversion
commited on
Commit
·
10d3d92
1
Parent(s):
577e44b
Fix encoding
Browse files
app.py
CHANGED
@@ -28,7 +28,6 @@ def get_models(name: str, device: torch.device, offload: bool, fp8: bool):
|
|
28 |
ae = load_ae(name, device=device)
|
29 |
return model, ae, t5, clip
|
30 |
|
31 |
-
|
32 |
class FluxGenerator:
|
33 |
def __init__(self, model_name: str, device: str, offload: bool, aggressive_offload: bool, args):
|
34 |
self.device = torch.device(device)
|
@@ -44,47 +43,7 @@ class FluxGenerator:
|
|
44 |
self.pulid_model = PuLIDPipeline(self.model, device='cuda', weight_dtype=torch.bfloat16)
|
45 |
self.pulid_model.load_pretrain(args.pretrained_model)
|
46 |
|
47 |
-
|
48 |
-
def encode_image_to_latents(self, img, opts):
|
49 |
-
"""
|
50 |
-
Opposite of decode: Takes a PIL image and encodes it into latents (x).
|
51 |
-
"""
|
52 |
-
t0 = time.perf_counter()
|
53 |
-
|
54 |
-
# Resize if necessary, or use opts.height / opts.width if you want a fixed size:
|
55 |
-
img = img.resize((opts.width, opts.height), resample=Image.LANCZOS)
|
56 |
-
|
57 |
-
# Convert image to torch.Tensor and scale to [-1, 1]
|
58 |
-
# Image is in [0, 255] → scale to [0,1] → then map to [-1,1].
|
59 |
-
x = np.array(img).astype(np.float32)
|
60 |
-
x = torch.from_numpy(x) # shape: (H, W, C)
|
61 |
-
x = (x / 127.5) - 1.0 # now in [-1, 1]
|
62 |
-
x = rearrange(x, "h w c -> 1 c h w") # shape: (1, C, H, W)
|
63 |
-
|
64 |
-
# Move encoder to device if you are offloading
|
65 |
-
if self.offload:
|
66 |
-
self.ae.encoder.to(self.device)
|
67 |
-
|
68 |
-
x = x.to(self.device)
|
69 |
-
|
70 |
-
# 2) Encode with autocast
|
71 |
-
with torch.autocast(device_type=self.device.type, dtype=torch.bfloat16):
|
72 |
-
x = self.ae.encode(x)
|
73 |
-
|
74 |
-
x = x.to(torch.bfloat16)
|
75 |
-
|
76 |
-
|
77 |
-
# 3) Offload if needed
|
78 |
-
if self.offload:
|
79 |
-
self.ae.encoder.cpu()
|
80 |
-
torch.cuda.empty_cache()
|
81 |
-
|
82 |
-
t1 = time.perf_counter()
|
83 |
-
print(f"Encoded in {t1 - t0:.2f} seconds.")
|
84 |
-
|
85 |
-
return x
|
86 |
-
|
87 |
-
@spaces.GPU(duration=90)
|
88 |
@torch.inference_mode()
|
89 |
def generate_image(
|
90 |
self,
|
@@ -153,8 +112,31 @@ class FluxGenerator:
|
|
153 |
noise = rearrange(noise, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
|
154 |
if noise.shape[0] == 1 and bs > 1:
|
155 |
noise = repeat(noise, "1 ... -> bs ...", bs=bs)
|
156 |
-
#
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
timesteps = get_schedule(opts.num_steps, x.shape[-1] * x.shape[-2] // 4, shift=False)
|
160 |
|
|
|
28 |
ae = load_ae(name, device=device)
|
29 |
return model, ae, t5, clip
|
30 |
|
|
|
31 |
class FluxGenerator:
|
32 |
def __init__(self, model_name: str, device: str, offload: bool, aggressive_offload: bool, args):
|
33 |
self.device = torch.device(device)
|
|
|
43 |
self.pulid_model = PuLIDPipeline(self.model, device='cuda', weight_dtype=torch.bfloat16)
|
44 |
self.pulid_model.load_pretrain(args.pretrained_model)
|
45 |
|
46 |
+
@spaces.GPU(duration=60)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
@torch.inference_mode()
|
48 |
def generate_image(
|
49 |
self,
|
|
|
112 |
noise = rearrange(noise, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
|
113 |
if noise.shape[0] == 1 and bs > 1:
|
114 |
noise = repeat(noise, "1 ... -> bs ...", bs=bs)
|
115 |
+
# Encode id_image directly here
|
116 |
+
encode_t0 = time.perf_counter()
|
117 |
+
|
118 |
+
# Resize image
|
119 |
+
id_image = id_image.resize((opts.width, opts.height), resample=Image.LANCZOS)
|
120 |
+
|
121 |
+
# Convert image to torch.Tensor and scale to [-1, 1]
|
122 |
+
x = np.array(id_image).astype(np.float32)
|
123 |
+
x = torch.from_numpy(x) # shape: (H, W, C)
|
124 |
+
x = (x / 127.5) - 1.0 # now in [-1, 1]
|
125 |
+
x = rearrange(x, "h w c -> 1 c h w") # shape: (1, C, H, W)
|
126 |
+
x = x.to(self.device)
|
127 |
+
# Encode with autocast
|
128 |
+
with torch.autocast(device_type=self.device.type, dtype=torch.bfloat16):
|
129 |
+
x = self.ae.encode(x)
|
130 |
+
|
131 |
+
x = x.to(torch.bfloat16)
|
132 |
+
|
133 |
+
# Offload if needed
|
134 |
+
if self.offload:
|
135 |
+
self.ae.encoder.to("cpu")
|
136 |
+
torch.cuda.empty_cache()
|
137 |
+
|
138 |
+
encode_t1 = time.perf_counter()
|
139 |
+
print(f"Encoded in {encode_t1 - encode_t0:.2f} seconds.")
|
140 |
|
141 |
timesteps = get_schedule(opts.num_steps, x.shape[-1] * x.shape[-2] // 4, shift=False)
|
142 |
|