File size: 1,157 Bytes
a789c75
 
 
 
 
09ca6a5
 
a789c75
 
 
 
 
 
 
 
 
 
 
 
 
09ca6a5
a789c75
 
 
 
 
 
09ca6a5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import torch
import numpy as np
from transformers import BertTokenizer
from model import HybridModel

LABELS = ['HS', 'Abusive', 'HS_Individual', 'HS_Group', 'HS_Religion', 'HS_Race',
          'HS_Physical', 'HS_Gender', 'HS_Other', 'HS_Weak', 'HS_Moderate', 'HS_Strong']
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def load_model_and_thresholds():
    model = HybridModel()
    model.load_state_dict(torch.load("best_model_dataScrap_final.pt", map_location=DEVICE))
    model.to(DEVICE)
    model.eval()

    tokenizer = BertTokenizer.from_pretrained("indobenchmark/indobert-base-p1")
    thresholds = np.load("optimal_thresholds_dataScrap_final.npy")
    return model, tokenizer, thresholds

def predict(text, model, tokenizer, thresholds):
    encoding = tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=128)
    input_ids = encoding["input_ids"].to(DEVICE)
    attention_mask = encoding["attention_mask"].to(DEVICE)

    with torch.no_grad():
        probs = model(input_ids, attention_mask).squeeze(0).cpu().numpy()

    return {label: float(prob) for label, prob in zip(LABELS, probs)}