Spaces:

FikriRiyadi
/

cyberbullying-detection-indonesia

Running

cyberbullying-detection-indonesia / predict_utils.py

Update predict_utils.py

09ca6a5 verified 9 days ago

1.16 kB

	import torch
	import numpy as np
	from transformers import BertTokenizer
	from model import HybridModel

	LABELS = ['HS', 'Abusive', 'HS_Individual', 'HS_Group', 'HS_Religion', 'HS_Race',
	'HS_Physical', 'HS_Gender', 'HS_Other', 'HS_Weak', 'HS_Moderate', 'HS_Strong']
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	def load_model_and_thresholds():
	model = HybridModel()
	model.load_state_dict(torch.load("best_model_dataScrap_final.pt", map_location=DEVICE))
	model.to(DEVICE)
	model.eval()

	tokenizer = BertTokenizer.from_pretrained("indobenchmark/indobert-base-p1")
	thresholds = np.load("optimal_thresholds_dataScrap_final.npy")
	return model, tokenizer, thresholds

	def predict(text, model, tokenizer, thresholds):
	encoding = tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=128)
	input_ids = encoding["input_ids"].to(DEVICE)
	attention_mask = encoding["attention_mask"].to(DEVICE)

	with torch.no_grad():
	probs = model(input_ids, attention_mask).squeeze(0).cpu().numpy()

	return {label: float(prob) for label, prob in zip(LABELS, probs)}