oist
/

blaser_2_0_qe_ported

Sentence Similarity

Safetensors

blaser

custom_code

Model card Files Files and versions

xet

Community

oist commited on 11 days ago

Commit

03493f1

1 Parent(s): 1f4121b

Fix model code

Browse files

Files changed (1) hide show

modeling_blaser.py +22 -19

modeling_blaser.py CHANGED Viewed

@@ -36,6 +36,7 @@ class BlaserConfig(PretrainedConfig):
 # ---------------- CORE MODEL ---------------- #
 ACTIVATIONS = {"TANH": nn.Tanh, "RELU": nn.ReLU}
 class BlaserCore(nn.Module):
     def __init__(
         self,
@@ -80,14 +81,6 @@ class BlaserCore(nn.Module):
         self.mlp = nn.Sequential(*modules)
-    def forward(self, src: Tensor, mt: Tensor, ref: Optional[Tensor] = None) -> Tensor:
-        proc = self._featurize(
-            src=self._norm(src),
-            mt=self._norm(mt),
-            ref=self._norm(ref),
-        )
-        return self.mlp(proc)
     def _norm(self, emb: Optional[Tensor]) -> Optional[Tensor]:
         return F.normalize(emb) if (emb is not None and self.norm_emb) else emb
@@ -104,14 +97,13 @@ class BlaserCore(nn.Module):
 # ---------------- HF MODEL WRAPPER ---------------- #
 class BlaserModel(PreTrainedModel):
     config_class = BlaserConfig
     def __init__(self, config: BlaserConfig):
         super().__init__(config)
-        # Instead of self.core, assign directly
-        self.mlp = BlaserCore(
             embedding_dim=config.embedding_dim,
             output_dim=config.output_dim,
             hidden_dims=config.hidden_dims,
@@ -120,14 +112,25 @@ class BlaserModel(PreTrainedModel):
             input_form=config.input_form,
             norm_emb=config.norm_emb,
             output_act=config.output_act,
-        ).mlp  # only take the Sequential MLP
     def forward(self, src, mt, ref=None):
-        # The old checkpoint expects the input feature processing inside BlaserCore
-        proc = BlaserCore._featurize(
-            self.mlp,  # pass self as `self` for static call
-            src=BlaserCore._norm(self.mlp, src),
-            mt=BlaserCore._norm(self.mlp, mt),
-            ref=BlaserCore._norm(self.mlp, ref)
-        )
         return self.mlp(proc)

 # ---------------- CORE MODEL ---------------- #
 ACTIVATIONS = {"TANH": nn.Tanh, "RELU": nn.ReLU}
 class BlaserCore(nn.Module):
     def __init__(
         self,
         self.mlp = nn.Sequential(*modules)
     def _norm(self, emb: Optional[Tensor]) -> Optional[Tensor]:
         return F.normalize(emb) if (emb is not None and self.norm_emb) else emb
 # ---------------- HF MODEL WRAPPER ---------------- #
 class BlaserModel(PreTrainedModel):
     config_class = BlaserConfig
     def __init__(self, config: BlaserConfig):
         super().__init__(config)
+        # Directly assign the Sequential MLP to self.mlp
+        core = BlaserCore(
             embedding_dim=config.embedding_dim,
             output_dim=config.output_dim,
             hidden_dims=config.hidden_dims,
             input_form=config.input_form,
             norm_emb=config.norm_emb,
             output_act=config.output_act,
+        )
+        self.mlp = core.mlp
+        self.input_form = core.input_form
+        self.norm_emb = core.norm_emb
     def forward(self, src, mt, ref=None):
+        # Use the same featurization as in BlaserCore
+        src = F.normalize(src) if self.norm_emb else src
+        mt = F.normalize(mt) if self.norm_emb else mt
+        ref = F.normalize(ref) if (ref is not None and self.norm_emb) else ref
+        if self.input_form == "COMET":
+            if ref is None:
+                raise ValueError("COMET input_form requires reference embedding")
+            proc = torch.cat(
+                [ref, mt, src * mt, ref * mt, torch.abs(mt - src), torch.abs(mt - ref)],
+                dim=-1,
+            )
+        else:  # QE
+            proc = torch.cat([src, mt, src * mt, torch.abs(mt - src)], dim=-1)
         return self.mlp(proc)