akhauriyash
/

Llama-3.2-1B-Butler

Text Generation

Model card Files Files and versions Community

akhauriyash commited on Mar 10

Commit

3658e2c

·

1 Parent(s): ded7955

cleanup

Files changed (2) hide show

config.json +4 -4
modeling_llama_butler.py +6 -6

config.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
   "architectures": [
-    "modeling_llama_butler.LlamaButlerForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "attn_reduce_factor": 8,
   "auto_map": {
-    "AutoConfig": "modeling_llama_butler.LlamaButlerConfig",
-    "AutoModel": "modeling_llama_butler.LlamaButlerForCausalLM",
-    "AutoModelForCausalLM": "modeling_llama_butler.LlamaButlerForCausalLM"
   },
   "bos_token_id": 128000,
   "dDash": 16,

 {
   "architectures": [
+    "LlamaButlerForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "attn_reduce_factor": 8,
   "auto_map": {
+    "AutoConfig": "modeling_llama_butler:LlamaButlerConfig",
+    "AutoModel": "modeling_llama_butler:LlamaButlerForCausalLM",
+    "AutoModelForCausalLM": "modeling_llama_butler:LlamaButlerForCausalLM"
   },
   "bos_token_id": 128000,
   "dDash": 16,

modeling_llama_butler.py CHANGED Viewed

@@ -1266,12 +1266,12 @@ class LlamaAttentionExperimental(nn.Module):
                 else:
                     self.head_importances = torch.cat([self.head_importances, head_importances], dim=1)
-        if self.layer_idx == 31:
-            if q_len == 1:
-                self.dtok += 1
-                print(f"Primary Key-Value Shape: {past_key_value.predictor_primary_key[0].shape}, Importance: {past_key_value.predictor_importance_key[0].shape}, Tok-Decoded: {self.dtok}")
-            else:
-                self.dtok = 0
         if not output_attentions:
             attn_weights = None

                 else:
                     self.head_importances = torch.cat([self.head_importances, head_importances], dim=1)
+        # if self.layer_idx == 31:
+        #     if q_len == 1:
+        #         self.dtok += 1
+        #         print(f"Primary Key-Value Shape: {past_key_value.predictor_primary_key[0].shape}, Importance: {past_key_value.predictor_importance_key[0].shape}, Tok-Decoded: {self.dtok}")
+        #     else:
+        #         self.dtok = 0
         if not output_attentions:
             attn_weights = None