FikriRiyadi commited on
Commit
a789c75
·
verified ·
1 Parent(s): 5683562

Create predict_utils.py

Browse files
Files changed (1) hide show
  1. predict_utils.py +33 -0
predict_utils.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from transformers import BertTokenizer
4
+ from model import HybridModel
5
+
6
+ LABELS = ['HS', 'Abusive', 'HS_Individual', 'HS_Group', 'HS_Religion', 'HS_Race',
7
+ 'HS_Physical', 'HS_Gender', 'HS_Other', 'HS_Weak', 'HS_Moderate', 'HS_Strong']
8
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+
10
+ def load_model_and_thresholds():
11
+ model = HybridModel()
12
+ model.load_state_dict(torch.load("best_model_dataScrap_final.pt", map_location=DEVICE))
13
+ model.to(DEVICE)
14
+ model.eval()
15
+
16
+ tokenizer = BertTokenizer.from_pretrained("indobenchmark/indobert-base-p1")
17
+ thresholds = np.load("optimal_thresholds_dataScrap_final.npy")
18
+ return model, tokenizer, thresholds
19
+
20
+ def predict(text, model, tokenizer, thresholds):
21
+ encoding = tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=128)
22
+ input_ids = encoding["input_ids"].to(DEVICE)
23
+ attention_mask = encoding["attention_mask"].to(DEVICE)
24
+
25
+ with torch.no_grad():
26
+ probs = model(input_ids, attention_mask).squeeze(0).cpu().numpy()
27
+
28
+ # Konversi probabilitas ke binary
29
+ preds_bin = (probs > thresholds).astype(int)
30
+
31
+ # Format hasil
32
+ return {label: f"✅ Ya ({prob:.2f})" if pred else f"❌ Tidak ({prob:.2f})"
33
+ for label, prob, pred in zip(LABELS, probs, preds_bin)}