Spaces:
Configuration error
Configuration error
Upload 5 files
Browse files- Procfile +1 -0
- README.md +2 -12
- app.py +234 -0
- helper.py +115 -0
- requirements.txt +14 -0
Procfile
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
web: python app.py
|
README.md
CHANGED
@@ -1,12 +1,2 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
emoji: 👀
|
4 |
-
colorFrom: purple
|
5 |
-
colorTo: yellow
|
6 |
-
sdk: docker
|
7 |
-
pinned: false
|
8 |
-
license: apache-2.0
|
9 |
-
short_description: keluh-cerdas app
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
# Keluh-Cerdas
|
2 |
+
Keluh Cerdas Web
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template, request, redirect, url_for, flash
|
2 |
+
from datetime import date, datetime
|
3 |
+
import os, json
|
4 |
+
import numpy as np
|
5 |
+
import pandas as pd
|
6 |
+
from wordcloud import WordCloud
|
7 |
+
from helper import predict_emotion,keyword, load_tflite_model
|
8 |
+
|
9 |
+
app = Flask(__name__)
|
10 |
+
|
11 |
+
@app.route('/')
|
12 |
+
@app.route('/dashboard')
|
13 |
+
def dashboard():
|
14 |
+
base_path = os.path.join('data')
|
15 |
+
dash_df = pd.read_excel(os.path.join(base_path, 'dataset_dash.xlsx'))
|
16 |
+
emosi_df = pd.read_excel(os.path.join(base_path, 'final_dataset.xlsx'))
|
17 |
+
|
18 |
+
# Info utama
|
19 |
+
total_keluhan = dash_df.shape[0]
|
20 |
+
topik_terbanyak = dash_df['Topik'].value_counts().idxmax()
|
21 |
+
instansi_terbanyak = dash_df['Instansi'].value_counts().idxmax()
|
22 |
+
|
23 |
+
# Proses tanggal
|
24 |
+
dash_df['tanggal_keluhan'] = pd.to_datetime(dash_df['tanggal_keluhan']).dt.normalize()
|
25 |
+
today = pd.to_datetime(datetime.today().date())
|
26 |
+
keluhan_hari_ini = dash_df[dash_df['tanggal_keluhan'] == today].shape[0]
|
27 |
+
|
28 |
+
# Data keluhan harian (last 7 days)
|
29 |
+
start_date = today - pd.Timedelta(days=6)
|
30 |
+
last_7_days_df = dash_df[(dash_df['tanggal_keluhan'] >= start_date) & (dash_df['tanggal_keluhan'] <= today)].copy()
|
31 |
+
|
32 |
+
last_7_days_df['nama_hari'] = last_7_days_df['tanggal_keluhan'].dt.day_name()
|
33 |
+
hari_en_to_id = {
|
34 |
+
'Monday': 'Senin',
|
35 |
+
'Tuesday': 'Selasa',
|
36 |
+
'Wednesday': 'Rabu',
|
37 |
+
'Thursday': 'Kamis',
|
38 |
+
'Friday': 'Jumat',
|
39 |
+
'Saturday': 'Sabtu',
|
40 |
+
'Sunday': 'Minggu'
|
41 |
+
}
|
42 |
+
last_7_days_df['nama_hari'] = last_7_days_df['nama_hari'].map(hari_en_to_id)
|
43 |
+
hari_urut = ['Senin', 'Selasa', 'Rabu', 'Kamis', 'Jumat', 'Sabtu', 'Minggu']
|
44 |
+
keluhan_per_hari = last_7_days_df.groupby('nama_hari').size().reindex(hari_urut, fill_value=0)
|
45 |
+
keluhan_harian_labels = keluhan_per_hari.index.tolist()
|
46 |
+
keluhan_harian_values = keluhan_per_hari.values.tolist()
|
47 |
+
|
48 |
+
# Data emosi
|
49 |
+
emosi_dist = emosi_df['emosi'].value_counts()
|
50 |
+
emosi_values = emosi_dist.values.tolist()
|
51 |
+
|
52 |
+
# Data keluhan bulanan dengan sorting berdasarkan tanggal
|
53 |
+
# Buat kolom untuk menampung data bulanan dan kelompokkan berdasarkan bulan dan tahun
|
54 |
+
dash_df['year'] = dash_df['tanggal_keluhan'].dt.year
|
55 |
+
dash_df['month'] = dash_df['tanggal_keluhan'].dt.month
|
56 |
+
|
57 |
+
# Kelompokkan berdasarkan bulan dan tahun
|
58 |
+
keluhan_bulanan = dash_df.groupby(['year', 'month']).size().reset_index()
|
59 |
+
keluhan_bulanan.columns = ['year', 'month', 'count']
|
60 |
+
|
61 |
+
# Urutkan berdasarkan tahun dan bulan
|
62 |
+
keluhan_bulanan = keluhan_bulanan.sort_values(['year', 'month'])
|
63 |
+
|
64 |
+
# Format label bulan-tahun untuk tampilan
|
65 |
+
import calendar
|
66 |
+
keluhan_bulanan['bulan_nama'] = keluhan_bulanan.apply(
|
67 |
+
lambda row: f"{calendar.month_abbr[row['month']]} {row['year']}",
|
68 |
+
axis=1
|
69 |
+
)
|
70 |
+
keluhan_bulanan_labels = keluhan_bulanan['bulan_nama'].tolist()
|
71 |
+
keluhan_bulanan_values = keluhan_bulanan['count'].tolist()
|
72 |
+
|
73 |
+
# ----- Top 5 Topik & Instansi ---------------------------------------
|
74 |
+
top_topik = dash_df['Topik'].value_counts().head(5).reset_index()
|
75 |
+
top_topik.columns = ['Topik', 'Jumlah']
|
76 |
+
|
77 |
+
top_instansi = dash_df['Instansi'].value_counts().head(5).reset_index()
|
78 |
+
top_instansi.columns = ['Instansi', 'Jumlah']
|
79 |
+
|
80 |
+
# ----- Word-cloud ----------------------------------------------------
|
81 |
+
text_wc = ' '.join(dash_df['keluhan'].dropna().astype(str))
|
82 |
+
wc_img = WordCloud(width=800, height=400, background_color="white").generate(text_wc)
|
83 |
+
|
84 |
+
wc_path = os.path.join('static', 'wordcloud.png')
|
85 |
+
os.makedirs(os.path.dirname(wc_path), exist_ok=True)
|
86 |
+
wc_img.to_file(wc_path) # simpan ⇒ static/wordcloud.png
|
87 |
+
|
88 |
+
return render_template(
|
89 |
+
'dashboard.html',
|
90 |
+
total_keluhan=total_keluhan,
|
91 |
+
topik_terbanyak=topik_terbanyak,
|
92 |
+
instansi_terbanyak=instansi_terbanyak,
|
93 |
+
keluhan_hari_ini=keluhan_hari_ini,
|
94 |
+
keluhan_harian_labels=keluhan_harian_labels,
|
95 |
+
keluhan_harian_values=keluhan_harian_values,
|
96 |
+
emosi_labels=emosi_dist.index.tolist(),
|
97 |
+
emosi_values=emosi_values,
|
98 |
+
keluhan_bulanan_labels=keluhan_bulanan_labels,
|
99 |
+
keluhan_bulanan_values=keluhan_bulanan_values,
|
100 |
+
wordcloud_image='wordcloud.png',
|
101 |
+
top_topik = top_topik.itertuples(index=False),
|
102 |
+
top_instansi = top_instansi.itertuples(index=False),
|
103 |
+
)
|
104 |
+
|
105 |
+
@app.route('/ubah_status', methods=['POST'])
|
106 |
+
def ubah_status():
|
107 |
+
keluhan_id = int(request.form['id'])
|
108 |
+
|
109 |
+
base_path = os.path.join('data')
|
110 |
+
file_path = os.path.join(base_path, 'vikor_fix.xlsx')
|
111 |
+
df = pd.read_excel(file_path)
|
112 |
+
|
113 |
+
# Update status menjadi 'selesai'
|
114 |
+
df.loc[df['id'] == keluhan_id, 'status'] = 'selesai'
|
115 |
+
|
116 |
+
# Simpan kembali
|
117 |
+
df.to_excel(file_path, index=False)
|
118 |
+
|
119 |
+
return redirect(url_for('leaderboard'))
|
120 |
+
|
121 |
+
|
122 |
+
@app.route('/leaderboard')
|
123 |
+
def leaderboard():
|
124 |
+
base_path = os.path.join('data')
|
125 |
+
final_df = pd.read_excel(os.path.join(base_path, 'vikor_fix.xlsx'))
|
126 |
+
|
127 |
+
# Filter hanya data yang belum selesai
|
128 |
+
df_pending = final_df[final_df['status'] != 'selesai']
|
129 |
+
|
130 |
+
# ----- Hitung VIKOR -----
|
131 |
+
f_emosi_plus = df_pending['new_emosi'].max()
|
132 |
+
f_emosi_min = df_pending['new_emosi'].min()
|
133 |
+
f_ranking_plus= df_pending['new_keyword'].max()
|
134 |
+
f_ranking_min = df_pending['new_keyword'].min()
|
135 |
+
|
136 |
+
emosi_denom = f_emosi_plus - f_emosi_min
|
137 |
+
ranking_denom = f_ranking_plus - f_ranking_min
|
138 |
+
|
139 |
+
df_pending['normalisasi_emosi'] = (f_emosi_plus - df_pending['new_emosi']) / (emosi_denom if emosi_denom != 0 else 1)
|
140 |
+
df_pending['normalisasi_ranking'] = (f_ranking_plus - df_pending['new_keyword']) / (ranking_denom if ranking_denom != 0 else 1)
|
141 |
+
|
142 |
+
df_pending['normalisasi_bobot_emosi'] = 0.5 * df_pending['normalisasi_emosi']
|
143 |
+
df_pending['normalisasi_bobot_ranking'] = 0.5 * df_pending['normalisasi_ranking']
|
144 |
+
|
145 |
+
df_pending['ultility'] = df_pending['normalisasi_bobot_emosi'] + df_pending['normalisasi_bobot_ranking']
|
146 |
+
df_pending['regret'] = df_pending[['normalisasi_bobot_emosi', 'normalisasi_bobot_ranking']].max(axis=1)
|
147 |
+
|
148 |
+
s_plus = df_pending['ultility'].max()
|
149 |
+
s_min = df_pending['ultility'].min()
|
150 |
+
r_plus = df_pending['regret'].max()
|
151 |
+
r_min = df_pending['regret'].min()
|
152 |
+
|
153 |
+
df_pending['vikor'] = 0.5 * ((df_pending['ultility'] - s_min) / (s_plus - s_min)) + \
|
154 |
+
0.5 * ((df_pending['regret'] - r_min) / (r_plus - r_min))
|
155 |
+
|
156 |
+
df_pending['rank'] = df_pending['vikor'].rank(ascending=True).astype(int)
|
157 |
+
|
158 |
+
# ----- Keluhan prioritas (10 skor vikor tertinggi) -------------------
|
159 |
+
prioritas_df = df_pending.sort_values(by='vikor', ascending=True).head(10)
|
160 |
+
|
161 |
+
# ----- Render ke template -------------------------------------------
|
162 |
+
return render_template(
|
163 |
+
'leaderboard.html',
|
164 |
+
keluhan_prioritas = prioritas_df
|
165 |
+
)
|
166 |
+
|
167 |
+
@app.route('/form', methods=['GET', 'POST'])
|
168 |
+
def form():
|
169 |
+
# Load dataframes
|
170 |
+
base_path = os.path.join('data')
|
171 |
+
instansi_df = pd.read_csv(os.path.join(base_path, 'mediacenter_instansi_202311220929.csv'), sep=';')
|
172 |
+
kecamatan_df = pd.read_csv(os.path.join(base_path, 'mediacenter_kecamatan_202311220929.csv'), sep=';')
|
173 |
+
kelurahan_df = pd.read_csv(os.path.join(base_path, 'mediacenter_kelurahan_202311220929.csv'), sep=';')
|
174 |
+
topik_df = pd.read_csv(os.path.join(base_path, 'mediacenter_topik_202311230834.csv'), sep=';')
|
175 |
+
# Join antar dataframe agar dapatkan nama kecamatan pada kelurahan
|
176 |
+
kecamatan_dict = kecamatan_df.set_index('id')['name'].to_dict()
|
177 |
+
kelurahan_df['kecamatan_name'] = kelurahan_df['kecamatan_id'].map(kecamatan_dict)
|
178 |
+
|
179 |
+
# Buat mapping: kecamatan_name -> list of kelurahan names
|
180 |
+
kelurahan_map = kelurahan_df.groupby('kecamatan_name')['name'].apply(list).to_dict()
|
181 |
+
|
182 |
+
message = None
|
183 |
+
if request.method == 'POST':
|
184 |
+
# Form
|
185 |
+
keluhan = request.form.get('keluhan')
|
186 |
+
instansi = request.form.get('instansi')
|
187 |
+
tanggal_keluhan = request.form.get('tanggal_keluhan')
|
188 |
+
kecamatan = request.form.get('kecamatan')
|
189 |
+
kelurahan = request.form.get('kelurahan')
|
190 |
+
topik = request.form.get('topik')
|
191 |
+
|
192 |
+
# Prediksi emosi dan ekstrak keyword
|
193 |
+
interpreter = load_tflite_model()
|
194 |
+
emosi = predict_emotion(keluhan, interpreter)
|
195 |
+
keywords, ranked_keywords = keyword(keluhan)
|
196 |
+
keywords_str = ', '.join(keywords)
|
197 |
+
emotion_mapping = {
|
198 |
+
'anger': 3,
|
199 |
+
'fear': 2,
|
200 |
+
'sadness': 1
|
201 |
+
}
|
202 |
+
new_emosi = emotion_mapping.get(emosi, emosi)
|
203 |
+
|
204 |
+
# Buat dictionary data baru
|
205 |
+
new_data = {
|
206 |
+
'keluhan': keluhan,
|
207 |
+
'instansi': instansi,
|
208 |
+
'tanggal_keluhan': tanggal_keluhan,
|
209 |
+
'kecamatan': kecamatan,
|
210 |
+
'kelurahan': kelurahan,
|
211 |
+
'topik': topik,
|
212 |
+
'emosi': emosi,
|
213 |
+
'new_emosi': new_emosi,
|
214 |
+
'new_keyword': ranked_keywords,
|
215 |
+
'keywords': keywords_str,
|
216 |
+
'status': 'belum_selesai'
|
217 |
+
}
|
218 |
+
|
219 |
+
# Simpan ke final_dataset.xlsx
|
220 |
+
# Cek apakah file sudah ada, jika tidak buat baru
|
221 |
+
dataset_path = os.path.join('data', 'vikor_fix.xlsx')
|
222 |
+
if not os.path.exists(dataset_path):
|
223 |
+
df = pd.DataFrame([new_data])
|
224 |
+
else:
|
225 |
+
df = pd.read_excel(dataset_path)
|
226 |
+
df = pd.concat([df, pd.DataFrame([new_data])], ignore_index=True)
|
227 |
+
df.to_excel(dataset_path, index=False)
|
228 |
+
|
229 |
+
message = "✅ Keluhan berhasil disimpan!"
|
230 |
+
|
231 |
+
# Pass dataframes to the template
|
232 |
+
return render_template('form.html', instansi=instansi_df,
|
233 |
+
kecamatan=kecamatan_df, kelurahan=kelurahan_df,
|
234 |
+
topik=topik_df, kelurahan_map=json.dumps(kelurahan_map), message=message)
|
helper.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import demoji
|
2 |
+
import re
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import nltk
|
6 |
+
import keras
|
7 |
+
from transformers import BertTokenizer, TFBertModel
|
8 |
+
import tensorflow as tf
|
9 |
+
from string import punctuation
|
10 |
+
from keybert import KeyBERT
|
11 |
+
from nltk.corpus import stopwords
|
12 |
+
from sentence_transformers import SentenceTransformer
|
13 |
+
import os
|
14 |
+
import warnings
|
15 |
+
warnings.filterwarnings("ignore")
|
16 |
+
|
17 |
+
# --- Configuration & Global Variables ---
|
18 |
+
MAX_LENGTH = 128
|
19 |
+
base_path = os.path.join('data')
|
20 |
+
model_path = os.path.join('Model')
|
21 |
+
|
22 |
+
# --- Helper: Download NLTK data ---
|
23 |
+
nltk.download('stopwords')
|
24 |
+
|
25 |
+
# --- Load Resources ---
|
26 |
+
alay_dict = pd.read_csv(os.path.join(base_path, 'kamus_alay.csv'), names=['alay', 'normal'], encoding='latin-1')
|
27 |
+
alay_dict_map = dict(zip(alay_dict['alay'], alay_dict['normal']))
|
28 |
+
stop_words = set(stopwords.words('indonesian'))
|
29 |
+
tokenizer = BertTokenizer.from_pretrained("indobenchmark/indobert-large-p1")
|
30 |
+
bert_model = TFBertModel.from_pretrained("indobenchmark/indobert-large-p1")
|
31 |
+
lstm_model = keras.models.load_model(os.path.join(model_path, 'indobert_lstm_model.keras'))
|
32 |
+
|
33 |
+
# --- Preprocessing Functions ---
|
34 |
+
def process_text(text):
|
35 |
+
# Baca kamus CSV ke dalam DataFrame
|
36 |
+
global alay_dict_map
|
37 |
+
text = str(text) # Convert Object to str
|
38 |
+
text = text.lower() # Lowercase text
|
39 |
+
text = re.sub(r'\d+', '', text) # Remove number
|
40 |
+
text = text.replace('\\n\\n\\n', ' ')
|
41 |
+
text = text.replace('\\n\\n', ' ')
|
42 |
+
text = text.replace('\\n', ' ')
|
43 |
+
text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE) # Remove link
|
44 |
+
text = re.sub(f"[{re.escape(punctuation)}]", " ", text) # Remove punctuation
|
45 |
+
text = demoji.replace(text, "") # Remove emoji
|
46 |
+
text = " ".join(text.split()) # Remove extra spaces, tabs, and new lines
|
47 |
+
text = text.split()
|
48 |
+
text = [alay_dict_map[word] if word in alay_dict_map else word for word in text]
|
49 |
+
text = ' '.join(text)
|
50 |
+
|
51 |
+
return text
|
52 |
+
|
53 |
+
# --- Emotion Prediction ---
|
54 |
+
def load_tflite_model(tflite_path="Model/indobert_lstm_model.tflite"):
|
55 |
+
interpreter = tf.lite.Interpreter(model_path=tflite_path)
|
56 |
+
interpreter.allocate_tensors()
|
57 |
+
return interpreter
|
58 |
+
|
59 |
+
def predict_emotion(text, interpreter):
|
60 |
+
cleaned = process_text(text)
|
61 |
+
tokens = tokenizer(cleaned, return_tensors="tf", padding='max_length', truncation=True, max_length=128)
|
62 |
+
|
63 |
+
# Ambil seluruh token embeddings (bukan hanya CLS token)
|
64 |
+
outputs = bert_model(**tokens)
|
65 |
+
embeddings = outputs.last_hidden_state # shape (1, 128, 1024)
|
66 |
+
|
67 |
+
input_data = embeddings.numpy().astype(np.float32) # sesuai shape TFLite
|
68 |
+
input_details = interpreter.get_input_details()
|
69 |
+
output_details = interpreter.get_output_details()
|
70 |
+
|
71 |
+
interpreter.set_tensor(input_details[0]['index'], input_data)
|
72 |
+
interpreter.invoke()
|
73 |
+
output = interpreter.get_tensor(output_details[0]['index'])
|
74 |
+
|
75 |
+
label = np.argmax(output, axis=1)[0]
|
76 |
+
emotions = ['anger', 'fear', 'sadness']
|
77 |
+
return emotions[label]
|
78 |
+
|
79 |
+
# --- Keyword Extraction & Ranking ---
|
80 |
+
# Load rank keyword
|
81 |
+
df_rank_keyword = pd.read_excel(os.path.join(base_path, 'Keyword_KeyBERT.xlsx'))
|
82 |
+
df_rank_keyword['keyword'] = df_rank_keyword['keyword'].apply(process_text)
|
83 |
+
df_rank_keyword['new_rank'] = df_rank_keyword['rank'].max() - df_rank_keyword['rank'] + 1
|
84 |
+
|
85 |
+
def rank_keywords(row):
|
86 |
+
total_ranking = 0
|
87 |
+
total_keyword = 0
|
88 |
+
for keyword in row:
|
89 |
+
frekuensi_rank = df_rank_keyword.loc[df_rank_keyword['keyword'] == keyword]
|
90 |
+
if not frekuensi_rank.empty:
|
91 |
+
total_ranking += frekuensi_rank['new_rank'].values[0]
|
92 |
+
total_keyword += 1
|
93 |
+
if total_keyword > 0:
|
94 |
+
return total_ranking / total_keyword
|
95 |
+
else:
|
96 |
+
return 0
|
97 |
+
|
98 |
+
def keyword(text):
|
99 |
+
# Model Keyword
|
100 |
+
sentence_model = SentenceTransformer("denaya/indoSBERT-large", trust_remote_code=True)
|
101 |
+
|
102 |
+
# Buat objek KeyBERT
|
103 |
+
kw_model = KeyBERT(model=sentence_model)
|
104 |
+
|
105 |
+
# Proses Keyword
|
106 |
+
stop_words = set(stopwords.words('indonesian'))
|
107 |
+
text = text.split()
|
108 |
+
text = [w for w in text if not w in stop_words]
|
109 |
+
text = ' '.join(text)
|
110 |
+
text = process_text(text)
|
111 |
+
keywords = kw_model.extract_keywords(text, top_n=5)
|
112 |
+
keyword = [keyword for keyword, _ in keywords]
|
113 |
+
rank = rank_keywords(keyword)
|
114 |
+
|
115 |
+
return keyword, rank
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Flask
|
2 |
+
numpy
|
3 |
+
pandas
|
4 |
+
wordcloud
|
5 |
+
tensorflow==2.12
|
6 |
+
keras==2.12
|
7 |
+
transformers==4.52.4
|
8 |
+
gunicorn
|
9 |
+
nltk
|
10 |
+
demoji
|
11 |
+
keybert
|
12 |
+
sentence-transformers
|
13 |
+
scikit-learn
|
14 |
+
regex
|