AbstractPhil
/

robust-velocity-adapter

Model card Files Files and versions Community

AbstractPhil commited on May 21

Commit

3cbdb95

verified ·

1 Parent(s): 140f873

Update README.md

Browse files

Files changed (1) hide show

README.md +10 -8

README.md CHANGED Viewed

@@ -25,6 +25,8 @@ I will be slowly releasing parts of Beatrix in training diagrams and stage the m
 Because I really don't know why Beatrix works the way she does, and I'm not going to just release something like that until I understand WHY it skips and hops past entropy.
 ```
 def main():
@@ -98,8 +100,8 @@ def main():
         # 3) Forward + loss in mixed precision
         with autocast():
-            t5_seq   = t5_mod(**t5_inputs).last_hidden_state      # [B,64,512]
-            clip_seq = clip_mod(**clip_inputs).last_hidden_state  # [B,64,768]
             anchor_pred, delta_pred, sigma_pred = adapter(t5_seq)
             delta_target = clip_seq - anchor_pred
@@ -213,7 +215,7 @@ class RobustVelocityAdapter(nn.Module):
         t5_dim: int = 512,
         clip_dim: int = 768,
         hidden_dim: int = 1024,
-        out_tokens: int = 64,      # now aligned with your T5 finetune
         self_attn_layers: int = 2,
         cross_heads: int = 8,
         max_rel_pos: int = 128,
@@ -394,7 +396,7 @@ t5_mod = T5EncoderModel.from_pretrained(
 ).to(DEVICE).eval()
 # 1d) velocity prediction adapter
-adapter = RobustVelocityAdapter(out_tokens=64).to(DEVICE).eval()
 state   = load_safetensors(local_adapter_directory, device="cpu")
 clean   = {k.replace("_orig_mod.", ""): v for k, v in state.items()}
 adapter.load_state_dict(clean, strict=False)
@@ -428,12 +430,12 @@ def generate_image_with_adapter(
                           return_tensors="pt").to(DEVICE)
     clip_uncond= clip_mod(**empty_in).last_hidden_state # [1,77,768]
-    # 2.2) T5 → adapter → anchor, delta, sigma (64 tokens)
     t5_in      = t5_tok(prompt,
-                        max_length=64, padding="max_length",
                         truncation=True, return_tensors="pt").to(DEVICE)
-    t5_seq     = t5_mod(**t5_in).last_hidden_state      # [1,64,512]
-    anchor, delta, sigma = adapter(t5_seq)              # each [1,64,768]
     # 2.3) Upsample to 77 tokens
     T_clip = clip_cond.shape[1]  # 77

 Because I really don't know why Beatrix works the way she does, and I'm not going to just release something like that until I understand WHY it skips and hops past entropy.
+77 tokens - not 64, there's no need to upscale the most recent 77tok version; it's built to the same plane as CLIP_L now.
 ```
 def main():
         # 3) Forward + loss in mixed precision
         with autocast():
+            t5_seq   = t5_mod(**t5_inputs).last_hidden_state      # [B,77,512]
+            clip_seq = clip_mod(**clip_inputs).last_hidden_state  # [B,77,768]
             anchor_pred, delta_pred, sigma_pred = adapter(t5_seq)
             delta_target = clip_seq - anchor_pred
         t5_dim: int = 512,
         clip_dim: int = 768,
         hidden_dim: int = 1024,
+        out_tokens: int = 77,      # now aligned with your T5 finetune
         self_attn_layers: int = 2,
         cross_heads: int = 8,
         max_rel_pos: int = 128,
 ).to(DEVICE).eval()
 # 1d) velocity prediction adapter
+adapter = RobustVelocityAdapter(out_tokens=77).to(DEVICE).eval()
 state   = load_safetensors(local_adapter_directory, device="cpu")
 clean   = {k.replace("_orig_mod.", ""): v for k, v in state.items()}
 adapter.load_state_dict(clean, strict=False)
                           return_tensors="pt").to(DEVICE)
     clip_uncond= clip_mod(**empty_in).last_hidden_state # [1,77,768]
+    # 2.2) T5 → adapter → anchor, delta, sigma (77 tokens)
     t5_in      = t5_tok(prompt,
+                        max_length=77, padding="max_length",
                         truncation=True, return_tensors="pt").to(DEVICE)
+    t5_seq     = t5_mod(**t5_in).last_hidden_state      # [1,77,512]
+    anchor, delta, sigma = adapter(t5_seq)              # each [1,77,768]
     # 2.3) Upsample to 77 tokens
     T_clip = clip_cond.shape[1]  # 77