Commit
·
0ef0739
1
Parent(s):
a731bb0
Update modeling_baichuan.py
Browse files- modeling_baichuan.py +7 -6
modeling_baichuan.py
CHANGED
|
@@ -35,6 +35,7 @@ def _fill_with_neg_inf(t):
|
|
| 35 |
return t.float().fill_(float("-inf")).type_as(t)
|
| 36 |
|
| 37 |
def _gen_alibi_mask(n_head, max_pos):
|
|
|
|
| 38 |
slopes = torch.Tensor(_get_interleave(n_head))
|
| 39 |
alibi = slopes.unsqueeze(1).unsqueeze(1) * torch.arange(max_pos).unsqueeze(0).unsqueeze(0).expand(
|
| 40 |
n_head, -1, -1)
|
|
@@ -46,7 +47,7 @@ def _gen_alibi_mask(n_head, max_pos):
|
|
| 46 |
return alibi_mask
|
| 47 |
|
| 48 |
def _buffered_future_mask(tensor, maxpos, alibi, attn_heads):
|
| 49 |
-
"""
|
| 50 |
dim = tensor.size(1)
|
| 51 |
_future_mask = torch.triu(
|
| 52 |
_fill_with_neg_inf(torch.zeros([maxpos, maxpos])), 1
|
|
@@ -235,7 +236,6 @@ class BaichuanPreTrainedModel(PreTrainedModel):
|
|
| 235 |
module.gradient_checkpointing = value
|
| 236 |
|
| 237 |
|
| 238 |
-
|
| 239 |
class BaichuanModel(BaichuanPreTrainedModel):
|
| 240 |
def __init__(self, config: BaichuanConfig):
|
| 241 |
super().__init__(config)
|
|
@@ -288,7 +288,6 @@ class BaichuanModel(BaichuanPreTrainedModel):
|
|
| 288 |
return_dict: Optional[bool] = True,
|
| 289 |
) -> Union[Tuple, BaseModelOutputWithPast]:
|
| 290 |
|
| 291 |
-
|
| 292 |
if input_ids is not None and inputs_embeds is not None:
|
| 293 |
raise ValueError("You cannot provide both input_ids and inputs_embeds simultaneously")
|
| 294 |
elif input_ids is not None:
|
|
@@ -298,6 +297,8 @@ class BaichuanModel(BaichuanPreTrainedModel):
|
|
| 298 |
else:
|
| 299 |
raise ValueError("You need to provide input_ids or inputs_embeds")
|
| 300 |
|
|
|
|
|
|
|
| 301 |
seq_length_with_past = seq_length
|
| 302 |
|
| 303 |
if past_key_values is not None:
|
|
@@ -440,7 +441,8 @@ class BaichuanForCausalLM(BaichuanPreTrainedModel):
|
|
| 440 |
**kwargs
|
| 441 |
) -> Union[Tuple, CausalLMOutputWithPast]:
|
| 442 |
|
| 443 |
-
|
|
|
|
| 444 |
# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
|
| 445 |
outputs = self.model(
|
| 446 |
input_ids=input_ids,
|
|
@@ -514,7 +516,6 @@ class BaichuanForCausalLM(BaichuanPreTrainedModel):
|
|
| 514 |
for layer_past in past_key_values
|
| 515 |
)
|
| 516 |
|
| 517 |
-
|
| 518 |
def quantize(self, bits: int):
|
| 519 |
try:
|
| 520 |
from .quantizer import QLinear
|
|
@@ -603,4 +604,4 @@ class BaichuanForCausalLM(BaichuanPreTrainedModel):
|
|
| 603 |
self.__class__.generate = PreTrainedModel.generate # disable stream
|
| 604 |
outputs = self.generate(input_ids, generation_config=generation_config)
|
| 605 |
response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
|
| 606 |
-
return response
|
|
|
|
| 35 |
return t.float().fill_(float("-inf")).type_as(t)
|
| 36 |
|
| 37 |
def _gen_alibi_mask(n_head, max_pos):
|
| 38 |
+
"""used in inference only"""
|
| 39 |
slopes = torch.Tensor(_get_interleave(n_head))
|
| 40 |
alibi = slopes.unsqueeze(1).unsqueeze(1) * torch.arange(max_pos).unsqueeze(0).unsqueeze(0).expand(
|
| 41 |
n_head, -1, -1)
|
|
|
|
| 47 |
return alibi_mask
|
| 48 |
|
| 49 |
def _buffered_future_mask(tensor, maxpos, alibi, attn_heads):
|
| 50 |
+
"""used in training only"""
|
| 51 |
dim = tensor.size(1)
|
| 52 |
_future_mask = torch.triu(
|
| 53 |
_fill_with_neg_inf(torch.zeros([maxpos, maxpos])), 1
|
|
|
|
| 236 |
module.gradient_checkpointing = value
|
| 237 |
|
| 238 |
|
|
|
|
| 239 |
class BaichuanModel(BaichuanPreTrainedModel):
|
| 240 |
def __init__(self, config: BaichuanConfig):
|
| 241 |
super().__init__(config)
|
|
|
|
| 288 |
return_dict: Optional[bool] = True,
|
| 289 |
) -> Union[Tuple, BaseModelOutputWithPast]:
|
| 290 |
|
|
|
|
| 291 |
if input_ids is not None and inputs_embeds is not None:
|
| 292 |
raise ValueError("You cannot provide both input_ids and inputs_embeds simultaneously")
|
| 293 |
elif input_ids is not None:
|
|
|
|
| 297 |
else:
|
| 298 |
raise ValueError("You need to provide input_ids or inputs_embeds")
|
| 299 |
|
| 300 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 301 |
+
|
| 302 |
seq_length_with_past = seq_length
|
| 303 |
|
| 304 |
if past_key_values is not None:
|
|
|
|
| 441 |
**kwargs
|
| 442 |
) -> Union[Tuple, CausalLMOutputWithPast]:
|
| 443 |
|
| 444 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 445 |
+
|
| 446 |
# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
|
| 447 |
outputs = self.model(
|
| 448 |
input_ids=input_ids,
|
|
|
|
| 516 |
for layer_past in past_key_values
|
| 517 |
)
|
| 518 |
|
|
|
|
| 519 |
def quantize(self, bits: int):
|
| 520 |
try:
|
| 521 |
from .quantizer import QLinear
|
|
|
|
| 604 |
self.__class__.generate = PreTrainedModel.generate # disable stream
|
| 605 |
outputs = self.generate(input_ids, generation_config=generation_config)
|
| 606 |
response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
|
| 607 |
+
return response
|