farkhanAdhitama commited on
Commit
d6934dd
Β·
1 Parent(s): 28ddb3f

update space

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import library yang diperlukan
2
+ import streamlit as st
3
+ import joblib
4
+ import re
5
+ import string
6
+ import nltk
7
+ from nltk.corpus import stopwords
8
+ from nltk.tokenize import word_tokenize
9
+ from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
10
+
11
+ # Download data NLTK
12
+ nltk.download("stopwords")
13
+ nltk.download("punkt")
14
+ nltk.download("punkt_tab")
15
+
16
+ # Load model dan vectorizer
17
+ model = joblib.load("random_forest_model.pkl")
18
+ vectorizer = joblib.load("tfidf_vectorizer.pkl")
19
+
20
+ # Stemmer Bahasa Indonesia
21
+ factory = StemmerFactory()
22
+ stemmer = factory.create_stemmer()
23
+
24
+
25
+ # Fungsi untuk membersihkan teks dari karakter yang tidak diperlukan
26
+ def delete_unused_char(text):
27
+ text = re.sub(r"@[A-Za-z0-9]+", "", text) # menghapus mention
28
+ text = re.sub(r"#[A-Za-z0-9]+", "", text) # menghapus hashtag
29
+ text = re.sub(r"RT[\s]", "", text) # menghapus RT
30
+ text = re.sub(r"http\S+", "", text) # menghapus link
31
+ text = re.sub(r"[0-9]+", "", text) # menghapus angka
32
+ text = re.sub(r"[^\w\s]", "", text) # menghapus karakter selain huruf dan angka
33
+ text = text.replace("\n", " ") # mengganti baris baru dengan spasi
34
+ text = text.translate(
35
+ str.maketrans("", "", string.punctuation)
36
+ ) # menghapus semua tanda baca
37
+ text = text.strip(" ") # menghapus karakter spasi dari kiri dan kanan teks
38
+ return text
39
+
40
+
41
+ # Fungsi untuk membersihkan teks
42
+ def cleaned_text(text):
43
+ delete_unused_char(text)
44
+ # 1. Lowercasing
45
+ text = text.lower()
46
+ # 2. Remove punctuation
47
+ text = text.translate(str.maketrans("", "", string.punctuation))
48
+ # 3. Remove numbers
49
+ text = re.sub(r"\d+", "", text)
50
+ # 4. Tokenization
51
+ words = word_tokenize(text)
52
+ # 5. Remove stopwords
53
+ stop_words = set(stopwords.words("indonesian")) # Stopwords bahasa Indonesia
54
+ words = [word for word in words if word not in stop_words]
55
+ # 6. Ubah kate ke bentu asli dengan Stemmer Sastrawi
56
+ words = [stemmer.stem(word) for word in words]
57
+ return " ".join(words)
58
+
59
+
60
+ # Fungsi untuk prediksi sentimen
61
+ def predict_sentiment(text):
62
+ text = cleaned_text(text) # Preprocessing sebelum prediksi
63
+ X = vectorizer.transform([text]) # Ubah teks menjadi vektor
64
+ prediction = model.predict(X)[0] # Prediksi sentimen
65
+ return prediction
66
+
67
+
68
+ # Streamlit UI
69
+ st.title("Analisis Sentimen Review BRI Mobile πŸ’³")
70
+ st.write("Masukkan review dan dapatkan prediksi sentimen (Positif, Negatif, Netral)")
71
+
72
+ # Input review dari pengguna
73
+ user_input = st.text_area("Masukkan review di sini:")
74
+
75
+ if st.button("Prediksi Sentimen"):
76
+ if user_input.strip() == "":
77
+ st.warning("Silakan masukkan teks terlebih dahulu!")
78
+ else:
79
+ sentiment = predict_sentiment(user_input)
80
+ st.success(f"Prediksi Sentimen: **{sentiment}**")
81
+ # st.write(cleaned_text(user_input))
82
+
83
+ st.write("Dibuat dengan πŸ’– oleh Muhammad Farkhan Adhitama")
notebook/sentimen_analysis_brimo.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
random_forest_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a6a774ffd94d3a66550a24d02862caa27e85c711258d07a127baf00f158833
3
+ size 65195937
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ scikit-learn
3
+ joblib
4
+ nltk
5
+ numpy
6
+ pandas
7
+ Sastrawi
tfidf_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65eabfb11ec6e48408db1619a37d01719ac5781385d4720582e451b6f87aa2d4
3
+ size 8008