Update README.md
Browse files
README.md
CHANGED
@@ -25,6 +25,8 @@ I will be slowly releasing parts of Beatrix in training diagrams and stage the m
|
|
25 |
|
26 |
Because I really don't know why Beatrix works the way she does, and I'm not going to just release something like that until I understand WHY it skips and hops past entropy.
|
27 |
|
|
|
|
|
28 |
|
29 |
```
|
30 |
def main():
|
@@ -98,8 +100,8 @@ def main():
|
|
98 |
|
99 |
# 3) Forward + loss in mixed precision
|
100 |
with autocast():
|
101 |
-
t5_seq = t5_mod(**t5_inputs).last_hidden_state # [B,
|
102 |
-
clip_seq = clip_mod(**clip_inputs).last_hidden_state # [B,
|
103 |
|
104 |
anchor_pred, delta_pred, sigma_pred = adapter(t5_seq)
|
105 |
delta_target = clip_seq - anchor_pred
|
@@ -213,7 +215,7 @@ class RobustVelocityAdapter(nn.Module):
|
|
213 |
t5_dim: int = 512,
|
214 |
clip_dim: int = 768,
|
215 |
hidden_dim: int = 1024,
|
216 |
-
out_tokens: int =
|
217 |
self_attn_layers: int = 2,
|
218 |
cross_heads: int = 8,
|
219 |
max_rel_pos: int = 128,
|
@@ -394,7 +396,7 @@ t5_mod = T5EncoderModel.from_pretrained(
|
|
394 |
).to(DEVICE).eval()
|
395 |
|
396 |
# 1d) velocity prediction adapter
|
397 |
-
adapter = RobustVelocityAdapter(out_tokens=
|
398 |
state = load_safetensors(local_adapter_directory, device="cpu")
|
399 |
clean = {k.replace("_orig_mod.", ""): v for k, v in state.items()}
|
400 |
adapter.load_state_dict(clean, strict=False)
|
@@ -428,12 +430,12 @@ def generate_image_with_adapter(
|
|
428 |
return_tensors="pt").to(DEVICE)
|
429 |
clip_uncond= clip_mod(**empty_in).last_hidden_state # [1,77,768]
|
430 |
|
431 |
-
# 2.2) T5 → adapter → anchor, delta, sigma (
|
432 |
t5_in = t5_tok(prompt,
|
433 |
-
max_length=
|
434 |
truncation=True, return_tensors="pt").to(DEVICE)
|
435 |
-
t5_seq = t5_mod(**t5_in).last_hidden_state # [1,
|
436 |
-
anchor, delta, sigma = adapter(t5_seq) # each [1,
|
437 |
|
438 |
# 2.3) Upsample to 77 tokens
|
439 |
T_clip = clip_cond.shape[1] # 77
|
|
|
25 |
|
26 |
Because I really don't know why Beatrix works the way she does, and I'm not going to just release something like that until I understand WHY it skips and hops past entropy.
|
27 |
|
28 |
+
77 tokens - not 64, there's no need to upscale the most recent 77tok version; it's built to the same plane as CLIP_L now.
|
29 |
+
|
30 |
|
31 |
```
|
32 |
def main():
|
|
|
100 |
|
101 |
# 3) Forward + loss in mixed precision
|
102 |
with autocast():
|
103 |
+
t5_seq = t5_mod(**t5_inputs).last_hidden_state # [B,77,512]
|
104 |
+
clip_seq = clip_mod(**clip_inputs).last_hidden_state # [B,77,768]
|
105 |
|
106 |
anchor_pred, delta_pred, sigma_pred = adapter(t5_seq)
|
107 |
delta_target = clip_seq - anchor_pred
|
|
|
215 |
t5_dim: int = 512,
|
216 |
clip_dim: int = 768,
|
217 |
hidden_dim: int = 1024,
|
218 |
+
out_tokens: int = 77, # now aligned with your T5 finetune
|
219 |
self_attn_layers: int = 2,
|
220 |
cross_heads: int = 8,
|
221 |
max_rel_pos: int = 128,
|
|
|
396 |
).to(DEVICE).eval()
|
397 |
|
398 |
# 1d) velocity prediction adapter
|
399 |
+
adapter = RobustVelocityAdapter(out_tokens=77).to(DEVICE).eval()
|
400 |
state = load_safetensors(local_adapter_directory, device="cpu")
|
401 |
clean = {k.replace("_orig_mod.", ""): v for k, v in state.items()}
|
402 |
adapter.load_state_dict(clean, strict=False)
|
|
|
430 |
return_tensors="pt").to(DEVICE)
|
431 |
clip_uncond= clip_mod(**empty_in).last_hidden_state # [1,77,768]
|
432 |
|
433 |
+
# 2.2) T5 → adapter → anchor, delta, sigma (77 tokens)
|
434 |
t5_in = t5_tok(prompt,
|
435 |
+
max_length=77, padding="max_length",
|
436 |
truncation=True, return_tensors="pt").to(DEVICE)
|
437 |
+
t5_seq = t5_mod(**t5_in).last_hidden_state # [1,77,512]
|
438 |
+
anchor, delta, sigma = adapter(t5_seq) # each [1,77,768]
|
439 |
|
440 |
# 2.3) Upsample to 77 tokens
|
441 |
T_clip = clip_cond.shape[1] # 77
|