Spaces:

agusswardanaa
/

api-blue-voice

Sleeping

App Files Files Community

api-blue-voice / src /inference.py

agusswardanaa

backend initial commit

a037ec5 3 months ago

raw

history blame contribute delete

4.06 kB

	import numpy as np
	import re
	import string
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords
	import joblib
	from tensorflow.keras.models import load_model

	# Fungsi untuk text preprocessing
	def cleaning_text(text):
	text = re.sub(r'@[A-Za-z0-9]+', '', text) # menghapus mention
	text = re.sub(r'#[A-Za-z0-9]+', '', text) # menghapus hashtag
	text = re.sub(r'RT[\s]', '', text) # menghapus RT
	text = re.sub(r"http\S+", '', text) # menghapus link
	text = re.sub(r'[0-9]+', '', text) # menghapus angka
	text = re.sub(r'[^\w\s]', '', text) # menghapus karakter selain huruf dan angka

	text = text.replace('\n', ' ') # mengganti baris baru dengan spasi
	text = text.translate(str.maketrans('', '', string.punctuation)) # menghapus semua tanda baca
	text = text.strip(' ') # menghapus karakter spasi dari kiri dan kanan teks
	return text

	def casefolding_text(text): # Mengubah semua karakter dalam teks menjadi huruf kecil
	text = text.lower()
	return text

	def tokenizing_text(text): # Memecah atau membagi string, teks menjadi daftar token
	text = word_tokenize(text)
	return text

	def filtering_text(text): # Menghapus stopwords dalam teks
	listStopwords = set(stopwords.words('english'))
	filtered = []
	for txt in text:
	if txt not in listStopwords:
	filtered.append(txt)
	text = filtered
	return text

	def to_sentence(list_words): # Mengubah daftar kata menjadi kalimat
	sentence = ' '.join(word for word in list_words)
	return sentence

	# Load model
	tfidf_attractions = joblib.load("models/tfidf_attractions.pkl")
	tfidf_amenities = joblib.load("models/tfidf_amenities.pkl")
	tfidf_access = joblib.load("models/tfidf_access.pkl")
	tfidf_price = joblib.load("models/tfidf_price.pkl")
	tfidf_no_aspect = joblib.load("models/tfidf_no_aspect.pkl")

	model_attractions = load_model("models/nn_attractions.h5")
	model_amenities = load_model("models/nn_amenities.h5")
	model_access = load_model("models/nn_access.h5")
	model_price = load_model("models/nn_price.h5")
	model_no_aspect = load_model("models/nn_no_aspect.h5")

	# Fungsi untuk inferensi
	def predict_sentiment(text):
	# Preprocessing
	text = cleaning_text(text)
	text = casefolding_text(text)
	text = tokenizing_text(text)
	text = filtering_text(text)
	text = to_sentence(text)

	# Transformasi teks ke dalam bentuk TF-IDF
	tfidf_attractions_text = tfidf_attractions.transform([text])
	tfidf_amenities_text = tfidf_amenities.transform([text])
	tfidf_access_text = tfidf_access.transform([text])
	tfidf_price_text = tfidf_price.transform([text])
	tfidf_no_aspect_text = tfidf_no_aspect.transform([text])

	# Prediksi
	prediction_attractions = model_attractions.predict(tfidf_attractions_text, verbose=0)
	prediction_amenities = model_amenities.predict(tfidf_amenities_text, verbose=0)
	prediction_access = model_access.predict(tfidf_access_text, verbose=0)
	prediction_price = model_price.predict(tfidf_price_text, verbose=0)
	prediction_no_aspect = model_no_aspect.predict(tfidf_no_aspect_text, verbose=0)

	sentiment_attractions = np.argmax(prediction_attractions, axis=1)[0]
	sentiment_amenities = np.argmax(prediction_amenities, axis=1)[0]
	sentiment_access = np.argmax(prediction_access, axis=1)[0]
	sentiment_price = np.argmax(prediction_price, axis=1)[0]
	sentiment_no_aspect = np.argmax(prediction_no_aspect, axis=1)[0]

	label_map = {
	0: "negative",
	1: "neutral",
	2: "positive",
	3: "none"
	}

	sentiment_attractions = label_map[sentiment_attractions]
	sentiment_amenities = label_map[sentiment_amenities]
	sentiment_access = label_map[sentiment_access]
	sentiment_price = label_map[sentiment_price]
	sentiment_no_aspect = label_map[sentiment_no_aspect]

	return {
	"attractions": sentiment_attractions,
	"amenities": sentiment_amenities,
	"access": sentiment_access,
	"price": sentiment_price,
	"no_aspect": sentiment_no_aspect
	}