Ray2333
/

GRM-Llama3.2-3B-rewardmodel-ft

Text Classification

Model card Files Files and versions

Ray2333 commited on Apr 30

Commit

a41cd27

·

verified ·

1 Parent(s): 91f5dee

Update README.md

Files changed (1) hide show

README.md +3 -0

README.md CHANGED Viewed

@@ -68,6 +68,9 @@ message_template = tokenizer.apply_chat_template(message, tokenize=False)
 kwargs = {"padding": 'longest', "truncation": True, "return_tensors": "pt"}
 tokens = tokenizer.encode_plus(message_template, **kwargs)
 with torch.no_grad():
   reward_tensor = reward_model(tokens["input_ids"][0].view(1,-1).to(device), attention_mask=tokens["attention_mask"][0].view(1,-1).to(device))[0]
   reward = reward_tensor.cpu().detach().item()

 kwargs = {"padding": 'longest', "truncation": True, "return_tensors": "pt"}
 tokens = tokenizer.encode_plus(message_template, **kwargs)
+# The encode_plus may add another bos token though no impact on the final performance, but you can also avoid this by using the following code:
+# tokens =  tokenizer.apply_chat_template(message, tokenize=True, return_dict=True, **kwargs)
 with torch.no_grad():
   reward_tensor = reward_model(tokens["input_ids"][0].view(1,-1).to(device), attention_mask=tokens["attention_mask"][0].view(1,-1).to(device))[0]
   reward = reward_tensor.cpu().detach().item()