import torch import numpy as np from transformers import BertTokenizer from model import HybridModel LABELS = ['HS', 'Abusive', 'HS_Individual', 'HS_Group', 'HS_Religion', 'HS_Race', 'HS_Physical', 'HS_Gender', 'HS_Other', 'HS_Weak', 'HS_Moderate', 'HS_Strong'] DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") def load_model_and_thresholds(): model = HybridModel() model.load_state_dict(torch.load("best_model_dataScrap_final.pt", map_location=DEVICE)) model.to(DEVICE) model.eval() tokenizer = BertTokenizer.from_pretrained("indobenchmark/indobert-base-p1") thresholds = np.load("optimal_thresholds_dataScrap_final.npy") return model, tokenizer, thresholds def predict(text, model, tokenizer, thresholds): encoding = tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=128) input_ids = encoding["input_ids"].to(DEVICE) attention_mask = encoding["attention_mask"].to(DEVICE) with torch.no_grad(): probs = model(input_ids, attention_mask).squeeze(0).cpu().numpy() return {label: float(prob) for label, prob in zip(LABELS, probs)}