api-blue-voice / src /inference.py
agusswardanaa's picture
backend initial commit
a037ec5
import numpy as np
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import joblib
from tensorflow.keras.models import load_model
# Fungsi untuk text preprocessing
def cleaning_text(text):
text = re.sub(r'@[A-Za-z0-9]+', '', text) # menghapus mention
text = re.sub(r'#[A-Za-z0-9]+', '', text) # menghapus hashtag
text = re.sub(r'RT[\s]', '', text) # menghapus RT
text = re.sub(r"http\S+", '', text) # menghapus link
text = re.sub(r'[0-9]+', '', text) # menghapus angka
text = re.sub(r'[^\w\s]', '', text) # menghapus karakter selain huruf dan angka
text = text.replace('\n', ' ') # mengganti baris baru dengan spasi
text = text.translate(str.maketrans('', '', string.punctuation)) # menghapus semua tanda baca
text = text.strip(' ') # menghapus karakter spasi dari kiri dan kanan teks
return text
def casefolding_text(text): # Mengubah semua karakter dalam teks menjadi huruf kecil
text = text.lower()
return text
def tokenizing_text(text): # Memecah atau membagi string, teks menjadi daftar token
text = word_tokenize(text)
return text
def filtering_text(text): # Menghapus stopwords dalam teks
listStopwords = set(stopwords.words('english'))
filtered = []
for txt in text:
if txt not in listStopwords:
filtered.append(txt)
text = filtered
return text
def to_sentence(list_words): # Mengubah daftar kata menjadi kalimat
sentence = ' '.join(word for word in list_words)
return sentence
# Load model
tfidf_attractions = joblib.load("models/tfidf_attractions.pkl")
tfidf_amenities = joblib.load("models/tfidf_amenities.pkl")
tfidf_access = joblib.load("models/tfidf_access.pkl")
tfidf_price = joblib.load("models/tfidf_price.pkl")
tfidf_no_aspect = joblib.load("models/tfidf_no_aspect.pkl")
model_attractions = load_model("models/nn_attractions.h5")
model_amenities = load_model("models/nn_amenities.h5")
model_access = load_model("models/nn_access.h5")
model_price = load_model("models/nn_price.h5")
model_no_aspect = load_model("models/nn_no_aspect.h5")
# Fungsi untuk inferensi
def predict_sentiment(text):
# Preprocessing
text = cleaning_text(text)
text = casefolding_text(text)
text = tokenizing_text(text)
text = filtering_text(text)
text = to_sentence(text)
# Transformasi teks ke dalam bentuk TF-IDF
tfidf_attractions_text = tfidf_attractions.transform([text])
tfidf_amenities_text = tfidf_amenities.transform([text])
tfidf_access_text = tfidf_access.transform([text])
tfidf_price_text = tfidf_price.transform([text])
tfidf_no_aspect_text = tfidf_no_aspect.transform([text])
# Prediksi
prediction_attractions = model_attractions.predict(tfidf_attractions_text, verbose=0)
prediction_amenities = model_amenities.predict(tfidf_amenities_text, verbose=0)
prediction_access = model_access.predict(tfidf_access_text, verbose=0)
prediction_price = model_price.predict(tfidf_price_text, verbose=0)
prediction_no_aspect = model_no_aspect.predict(tfidf_no_aspect_text, verbose=0)
sentiment_attractions = np.argmax(prediction_attractions, axis=1)[0]
sentiment_amenities = np.argmax(prediction_amenities, axis=1)[0]
sentiment_access = np.argmax(prediction_access, axis=1)[0]
sentiment_price = np.argmax(prediction_price, axis=1)[0]
sentiment_no_aspect = np.argmax(prediction_no_aspect, axis=1)[0]
label_map = {
0: "negative",
1: "neutral",
2: "positive",
3: "none"
}
sentiment_attractions = label_map[sentiment_attractions]
sentiment_amenities = label_map[sentiment_amenities]
sentiment_access = label_map[sentiment_access]
sentiment_price = label_map[sentiment_price]
sentiment_no_aspect = label_map[sentiment_no_aspect]
return {
"attractions": sentiment_attractions,
"amenities": sentiment_amenities,
"access": sentiment_access,
"price": sentiment_price,
"no_aspect": sentiment_no_aspect
}