AbstractPhil commited on
Commit
3cbdb95
·
verified ·
1 Parent(s): 140f873

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -8
README.md CHANGED
@@ -25,6 +25,8 @@ I will be slowly releasing parts of Beatrix in training diagrams and stage the m
25
 
26
  Because I really don't know why Beatrix works the way she does, and I'm not going to just release something like that until I understand WHY it skips and hops past entropy.
27
 
 
 
28
 
29
  ```
30
  def main():
@@ -98,8 +100,8 @@ def main():
98
 
99
  # 3) Forward + loss in mixed precision
100
  with autocast():
101
- t5_seq = t5_mod(**t5_inputs).last_hidden_state # [B,64,512]
102
- clip_seq = clip_mod(**clip_inputs).last_hidden_state # [B,64,768]
103
 
104
  anchor_pred, delta_pred, sigma_pred = adapter(t5_seq)
105
  delta_target = clip_seq - anchor_pred
@@ -213,7 +215,7 @@ class RobustVelocityAdapter(nn.Module):
213
  t5_dim: int = 512,
214
  clip_dim: int = 768,
215
  hidden_dim: int = 1024,
216
- out_tokens: int = 64, # now aligned with your T5 finetune
217
  self_attn_layers: int = 2,
218
  cross_heads: int = 8,
219
  max_rel_pos: int = 128,
@@ -394,7 +396,7 @@ t5_mod = T5EncoderModel.from_pretrained(
394
  ).to(DEVICE).eval()
395
 
396
  # 1d) velocity prediction adapter
397
- adapter = RobustVelocityAdapter(out_tokens=64).to(DEVICE).eval()
398
  state = load_safetensors(local_adapter_directory, device="cpu")
399
  clean = {k.replace("_orig_mod.", ""): v for k, v in state.items()}
400
  adapter.load_state_dict(clean, strict=False)
@@ -428,12 +430,12 @@ def generate_image_with_adapter(
428
  return_tensors="pt").to(DEVICE)
429
  clip_uncond= clip_mod(**empty_in).last_hidden_state # [1,77,768]
430
 
431
- # 2.2) T5 → adapter → anchor, delta, sigma (64 tokens)
432
  t5_in = t5_tok(prompt,
433
- max_length=64, padding="max_length",
434
  truncation=True, return_tensors="pt").to(DEVICE)
435
- t5_seq = t5_mod(**t5_in).last_hidden_state # [1,64,512]
436
- anchor, delta, sigma = adapter(t5_seq) # each [1,64,768]
437
 
438
  # 2.3) Upsample to 77 tokens
439
  T_clip = clip_cond.shape[1] # 77
 
25
 
26
  Because I really don't know why Beatrix works the way she does, and I'm not going to just release something like that until I understand WHY it skips and hops past entropy.
27
 
28
+ 77 tokens - not 64, there's no need to upscale the most recent 77tok version; it's built to the same plane as CLIP_L now.
29
+
30
 
31
  ```
32
  def main():
 
100
 
101
  # 3) Forward + loss in mixed precision
102
  with autocast():
103
+ t5_seq = t5_mod(**t5_inputs).last_hidden_state # [B,77,512]
104
+ clip_seq = clip_mod(**clip_inputs).last_hidden_state # [B,77,768]
105
 
106
  anchor_pred, delta_pred, sigma_pred = adapter(t5_seq)
107
  delta_target = clip_seq - anchor_pred
 
215
  t5_dim: int = 512,
216
  clip_dim: int = 768,
217
  hidden_dim: int = 1024,
218
+ out_tokens: int = 77, # now aligned with your T5 finetune
219
  self_attn_layers: int = 2,
220
  cross_heads: int = 8,
221
  max_rel_pos: int = 128,
 
396
  ).to(DEVICE).eval()
397
 
398
  # 1d) velocity prediction adapter
399
+ adapter = RobustVelocityAdapter(out_tokens=77).to(DEVICE).eval()
400
  state = load_safetensors(local_adapter_directory, device="cpu")
401
  clean = {k.replace("_orig_mod.", ""): v for k, v in state.items()}
402
  adapter.load_state_dict(clean, strict=False)
 
430
  return_tensors="pt").to(DEVICE)
431
  clip_uncond= clip_mod(**empty_in).last_hidden_state # [1,77,768]
432
 
433
+ # 2.2) T5 → adapter → anchor, delta, sigma (77 tokens)
434
  t5_in = t5_tok(prompt,
435
+ max_length=77, padding="max_length",
436
  truncation=True, return_tensors="pt").to(DEVICE)
437
+ t5_seq = t5_mod(**t5_in).last_hidden_state # [1,77,512]
438
+ anchor, delta, sigma = adapter(t5_seq) # each [1,77,768]
439
 
440
  # 2.3) Upsample to 77 tokens
441
  T_clip = clip_cond.shape[1] # 77