mahfud00 commited on
Commit
bbaa525
·
verified ·
1 Parent(s): 798d18d

Upload 5 files

Browse files
Files changed (5) hide show
  1. Procfile +1 -0
  2. README.md +2 -12
  3. app.py +234 -0
  4. helper.py +115 -0
  5. requirements.txt +14 -0
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: python app.py
README.md CHANGED
@@ -1,12 +1,2 @@
1
- ---
2
- title: Keluh Cerdas
3
- emoji: 👀
4
- colorFrom: purple
5
- colorTo: yellow
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- short_description: keluh-cerdas app
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Keluh-Cerdas
2
+ Keluh Cerdas Web
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, redirect, url_for, flash
2
+ from datetime import date, datetime
3
+ import os, json
4
+ import numpy as np
5
+ import pandas as pd
6
+ from wordcloud import WordCloud
7
+ from helper import predict_emotion,keyword, load_tflite_model
8
+
9
+ app = Flask(__name__)
10
+
11
+ @app.route('/')
12
+ @app.route('/dashboard')
13
+ def dashboard():
14
+ base_path = os.path.join('data')
15
+ dash_df = pd.read_excel(os.path.join(base_path, 'dataset_dash.xlsx'))
16
+ emosi_df = pd.read_excel(os.path.join(base_path, 'final_dataset.xlsx'))
17
+
18
+ # Info utama
19
+ total_keluhan = dash_df.shape[0]
20
+ topik_terbanyak = dash_df['Topik'].value_counts().idxmax()
21
+ instansi_terbanyak = dash_df['Instansi'].value_counts().idxmax()
22
+
23
+ # Proses tanggal
24
+ dash_df['tanggal_keluhan'] = pd.to_datetime(dash_df['tanggal_keluhan']).dt.normalize()
25
+ today = pd.to_datetime(datetime.today().date())
26
+ keluhan_hari_ini = dash_df[dash_df['tanggal_keluhan'] == today].shape[0]
27
+
28
+ # Data keluhan harian (last 7 days)
29
+ start_date = today - pd.Timedelta(days=6)
30
+ last_7_days_df = dash_df[(dash_df['tanggal_keluhan'] >= start_date) & (dash_df['tanggal_keluhan'] <= today)].copy()
31
+
32
+ last_7_days_df['nama_hari'] = last_7_days_df['tanggal_keluhan'].dt.day_name()
33
+ hari_en_to_id = {
34
+ 'Monday': 'Senin',
35
+ 'Tuesday': 'Selasa',
36
+ 'Wednesday': 'Rabu',
37
+ 'Thursday': 'Kamis',
38
+ 'Friday': 'Jumat',
39
+ 'Saturday': 'Sabtu',
40
+ 'Sunday': 'Minggu'
41
+ }
42
+ last_7_days_df['nama_hari'] = last_7_days_df['nama_hari'].map(hari_en_to_id)
43
+ hari_urut = ['Senin', 'Selasa', 'Rabu', 'Kamis', 'Jumat', 'Sabtu', 'Minggu']
44
+ keluhan_per_hari = last_7_days_df.groupby('nama_hari').size().reindex(hari_urut, fill_value=0)
45
+ keluhan_harian_labels = keluhan_per_hari.index.tolist()
46
+ keluhan_harian_values = keluhan_per_hari.values.tolist()
47
+
48
+ # Data emosi
49
+ emosi_dist = emosi_df['emosi'].value_counts()
50
+ emosi_values = emosi_dist.values.tolist()
51
+
52
+ # Data keluhan bulanan dengan sorting berdasarkan tanggal
53
+ # Buat kolom untuk menampung data bulanan dan kelompokkan berdasarkan bulan dan tahun
54
+ dash_df['year'] = dash_df['tanggal_keluhan'].dt.year
55
+ dash_df['month'] = dash_df['tanggal_keluhan'].dt.month
56
+
57
+ # Kelompokkan berdasarkan bulan dan tahun
58
+ keluhan_bulanan = dash_df.groupby(['year', 'month']).size().reset_index()
59
+ keluhan_bulanan.columns = ['year', 'month', 'count']
60
+
61
+ # Urutkan berdasarkan tahun dan bulan
62
+ keluhan_bulanan = keluhan_bulanan.sort_values(['year', 'month'])
63
+
64
+ # Format label bulan-tahun untuk tampilan
65
+ import calendar
66
+ keluhan_bulanan['bulan_nama'] = keluhan_bulanan.apply(
67
+ lambda row: f"{calendar.month_abbr[row['month']]} {row['year']}",
68
+ axis=1
69
+ )
70
+ keluhan_bulanan_labels = keluhan_bulanan['bulan_nama'].tolist()
71
+ keluhan_bulanan_values = keluhan_bulanan['count'].tolist()
72
+
73
+ # ----- Top 5 Topik & Instansi ---------------------------------------
74
+ top_topik = dash_df['Topik'].value_counts().head(5).reset_index()
75
+ top_topik.columns = ['Topik', 'Jumlah']
76
+
77
+ top_instansi = dash_df['Instansi'].value_counts().head(5).reset_index()
78
+ top_instansi.columns = ['Instansi', 'Jumlah']
79
+
80
+ # ----- Word-cloud ----------------------------------------------------
81
+ text_wc = ' '.join(dash_df['keluhan'].dropna().astype(str))
82
+ wc_img = WordCloud(width=800, height=400, background_color="white").generate(text_wc)
83
+
84
+ wc_path = os.path.join('static', 'wordcloud.png')
85
+ os.makedirs(os.path.dirname(wc_path), exist_ok=True)
86
+ wc_img.to_file(wc_path) # simpan ⇒ static/wordcloud.png
87
+
88
+ return render_template(
89
+ 'dashboard.html',
90
+ total_keluhan=total_keluhan,
91
+ topik_terbanyak=topik_terbanyak,
92
+ instansi_terbanyak=instansi_terbanyak,
93
+ keluhan_hari_ini=keluhan_hari_ini,
94
+ keluhan_harian_labels=keluhan_harian_labels,
95
+ keluhan_harian_values=keluhan_harian_values,
96
+ emosi_labels=emosi_dist.index.tolist(),
97
+ emosi_values=emosi_values,
98
+ keluhan_bulanan_labels=keluhan_bulanan_labels,
99
+ keluhan_bulanan_values=keluhan_bulanan_values,
100
+ wordcloud_image='wordcloud.png',
101
+ top_topik = top_topik.itertuples(index=False),
102
+ top_instansi = top_instansi.itertuples(index=False),
103
+ )
104
+
105
+ @app.route('/ubah_status', methods=['POST'])
106
+ def ubah_status():
107
+ keluhan_id = int(request.form['id'])
108
+
109
+ base_path = os.path.join('data')
110
+ file_path = os.path.join(base_path, 'vikor_fix.xlsx')
111
+ df = pd.read_excel(file_path)
112
+
113
+ # Update status menjadi 'selesai'
114
+ df.loc[df['id'] == keluhan_id, 'status'] = 'selesai'
115
+
116
+ # Simpan kembali
117
+ df.to_excel(file_path, index=False)
118
+
119
+ return redirect(url_for('leaderboard'))
120
+
121
+
122
+ @app.route('/leaderboard')
123
+ def leaderboard():
124
+ base_path = os.path.join('data')
125
+ final_df = pd.read_excel(os.path.join(base_path, 'vikor_fix.xlsx'))
126
+
127
+ # Filter hanya data yang belum selesai
128
+ df_pending = final_df[final_df['status'] != 'selesai']
129
+
130
+ # ----- Hitung VIKOR -----
131
+ f_emosi_plus = df_pending['new_emosi'].max()
132
+ f_emosi_min = df_pending['new_emosi'].min()
133
+ f_ranking_plus= df_pending['new_keyword'].max()
134
+ f_ranking_min = df_pending['new_keyword'].min()
135
+
136
+ emosi_denom = f_emosi_plus - f_emosi_min
137
+ ranking_denom = f_ranking_plus - f_ranking_min
138
+
139
+ df_pending['normalisasi_emosi'] = (f_emosi_plus - df_pending['new_emosi']) / (emosi_denom if emosi_denom != 0 else 1)
140
+ df_pending['normalisasi_ranking'] = (f_ranking_plus - df_pending['new_keyword']) / (ranking_denom if ranking_denom != 0 else 1)
141
+
142
+ df_pending['normalisasi_bobot_emosi'] = 0.5 * df_pending['normalisasi_emosi']
143
+ df_pending['normalisasi_bobot_ranking'] = 0.5 * df_pending['normalisasi_ranking']
144
+
145
+ df_pending['ultility'] = df_pending['normalisasi_bobot_emosi'] + df_pending['normalisasi_bobot_ranking']
146
+ df_pending['regret'] = df_pending[['normalisasi_bobot_emosi', 'normalisasi_bobot_ranking']].max(axis=1)
147
+
148
+ s_plus = df_pending['ultility'].max()
149
+ s_min = df_pending['ultility'].min()
150
+ r_plus = df_pending['regret'].max()
151
+ r_min = df_pending['regret'].min()
152
+
153
+ df_pending['vikor'] = 0.5 * ((df_pending['ultility'] - s_min) / (s_plus - s_min)) + \
154
+ 0.5 * ((df_pending['regret'] - r_min) / (r_plus - r_min))
155
+
156
+ df_pending['rank'] = df_pending['vikor'].rank(ascending=True).astype(int)
157
+
158
+ # ----- Keluhan prioritas (10 skor vikor tertinggi) -------------------
159
+ prioritas_df = df_pending.sort_values(by='vikor', ascending=True).head(10)
160
+
161
+ # ----- Render ke template -------------------------------------------
162
+ return render_template(
163
+ 'leaderboard.html',
164
+ keluhan_prioritas = prioritas_df
165
+ )
166
+
167
+ @app.route('/form', methods=['GET', 'POST'])
168
+ def form():
169
+ # Load dataframes
170
+ base_path = os.path.join('data')
171
+ instansi_df = pd.read_csv(os.path.join(base_path, 'mediacenter_instansi_202311220929.csv'), sep=';')
172
+ kecamatan_df = pd.read_csv(os.path.join(base_path, 'mediacenter_kecamatan_202311220929.csv'), sep=';')
173
+ kelurahan_df = pd.read_csv(os.path.join(base_path, 'mediacenter_kelurahan_202311220929.csv'), sep=';')
174
+ topik_df = pd.read_csv(os.path.join(base_path, 'mediacenter_topik_202311230834.csv'), sep=';')
175
+ # Join antar dataframe agar dapatkan nama kecamatan pada kelurahan
176
+ kecamatan_dict = kecamatan_df.set_index('id')['name'].to_dict()
177
+ kelurahan_df['kecamatan_name'] = kelurahan_df['kecamatan_id'].map(kecamatan_dict)
178
+
179
+ # Buat mapping: kecamatan_name -> list of kelurahan names
180
+ kelurahan_map = kelurahan_df.groupby('kecamatan_name')['name'].apply(list).to_dict()
181
+
182
+ message = None
183
+ if request.method == 'POST':
184
+ # Form
185
+ keluhan = request.form.get('keluhan')
186
+ instansi = request.form.get('instansi')
187
+ tanggal_keluhan = request.form.get('tanggal_keluhan')
188
+ kecamatan = request.form.get('kecamatan')
189
+ kelurahan = request.form.get('kelurahan')
190
+ topik = request.form.get('topik')
191
+
192
+ # Prediksi emosi dan ekstrak keyword
193
+ interpreter = load_tflite_model()
194
+ emosi = predict_emotion(keluhan, interpreter)
195
+ keywords, ranked_keywords = keyword(keluhan)
196
+ keywords_str = ', '.join(keywords)
197
+ emotion_mapping = {
198
+ 'anger': 3,
199
+ 'fear': 2,
200
+ 'sadness': 1
201
+ }
202
+ new_emosi = emotion_mapping.get(emosi, emosi)
203
+
204
+ # Buat dictionary data baru
205
+ new_data = {
206
+ 'keluhan': keluhan,
207
+ 'instansi': instansi,
208
+ 'tanggal_keluhan': tanggal_keluhan,
209
+ 'kecamatan': kecamatan,
210
+ 'kelurahan': kelurahan,
211
+ 'topik': topik,
212
+ 'emosi': emosi,
213
+ 'new_emosi': new_emosi,
214
+ 'new_keyword': ranked_keywords,
215
+ 'keywords': keywords_str,
216
+ 'status': 'belum_selesai'
217
+ }
218
+
219
+ # Simpan ke final_dataset.xlsx
220
+ # Cek apakah file sudah ada, jika tidak buat baru
221
+ dataset_path = os.path.join('data', 'vikor_fix.xlsx')
222
+ if not os.path.exists(dataset_path):
223
+ df = pd.DataFrame([new_data])
224
+ else:
225
+ df = pd.read_excel(dataset_path)
226
+ df = pd.concat([df, pd.DataFrame([new_data])], ignore_index=True)
227
+ df.to_excel(dataset_path, index=False)
228
+
229
+ message = "✅ Keluhan berhasil disimpan!"
230
+
231
+ # Pass dataframes to the template
232
+ return render_template('form.html', instansi=instansi_df,
233
+ kecamatan=kecamatan_df, kelurahan=kelurahan_df,
234
+ topik=topik_df, kelurahan_map=json.dumps(kelurahan_map), message=message)
helper.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import demoji
2
+ import re
3
+ import pandas as pd
4
+ import numpy as np
5
+ import nltk
6
+ import keras
7
+ from transformers import BertTokenizer, TFBertModel
8
+ import tensorflow as tf
9
+ from string import punctuation
10
+ from keybert import KeyBERT
11
+ from nltk.corpus import stopwords
12
+ from sentence_transformers import SentenceTransformer
13
+ import os
14
+ import warnings
15
+ warnings.filterwarnings("ignore")
16
+
17
+ # --- Configuration & Global Variables ---
18
+ MAX_LENGTH = 128
19
+ base_path = os.path.join('data')
20
+ model_path = os.path.join('Model')
21
+
22
+ # --- Helper: Download NLTK data ---
23
+ nltk.download('stopwords')
24
+
25
+ # --- Load Resources ---
26
+ alay_dict = pd.read_csv(os.path.join(base_path, 'kamus_alay.csv'), names=['alay', 'normal'], encoding='latin-1')
27
+ alay_dict_map = dict(zip(alay_dict['alay'], alay_dict['normal']))
28
+ stop_words = set(stopwords.words('indonesian'))
29
+ tokenizer = BertTokenizer.from_pretrained("indobenchmark/indobert-large-p1")
30
+ bert_model = TFBertModel.from_pretrained("indobenchmark/indobert-large-p1")
31
+ lstm_model = keras.models.load_model(os.path.join(model_path, 'indobert_lstm_model.keras'))
32
+
33
+ # --- Preprocessing Functions ---
34
+ def process_text(text):
35
+ # Baca kamus CSV ke dalam DataFrame
36
+ global alay_dict_map
37
+ text = str(text) # Convert Object to str
38
+ text = text.lower() # Lowercase text
39
+ text = re.sub(r'\d+', '', text) # Remove number
40
+ text = text.replace('\\n\\n\\n', ' ')
41
+ text = text.replace('\\n\\n', ' ')
42
+ text = text.replace('\\n', ' ')
43
+ text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE) # Remove link
44
+ text = re.sub(f"[{re.escape(punctuation)}]", " ", text) # Remove punctuation
45
+ text = demoji.replace(text, "") # Remove emoji
46
+ text = " ".join(text.split()) # Remove extra spaces, tabs, and new lines
47
+ text = text.split()
48
+ text = [alay_dict_map[word] if word in alay_dict_map else word for word in text]
49
+ text = ' '.join(text)
50
+
51
+ return text
52
+
53
+ # --- Emotion Prediction ---
54
+ def load_tflite_model(tflite_path="Model/indobert_lstm_model.tflite"):
55
+ interpreter = tf.lite.Interpreter(model_path=tflite_path)
56
+ interpreter.allocate_tensors()
57
+ return interpreter
58
+
59
+ def predict_emotion(text, interpreter):
60
+ cleaned = process_text(text)
61
+ tokens = tokenizer(cleaned, return_tensors="tf", padding='max_length', truncation=True, max_length=128)
62
+
63
+ # Ambil seluruh token embeddings (bukan hanya CLS token)
64
+ outputs = bert_model(**tokens)
65
+ embeddings = outputs.last_hidden_state # shape (1, 128, 1024)
66
+
67
+ input_data = embeddings.numpy().astype(np.float32) # sesuai shape TFLite
68
+ input_details = interpreter.get_input_details()
69
+ output_details = interpreter.get_output_details()
70
+
71
+ interpreter.set_tensor(input_details[0]['index'], input_data)
72
+ interpreter.invoke()
73
+ output = interpreter.get_tensor(output_details[0]['index'])
74
+
75
+ label = np.argmax(output, axis=1)[0]
76
+ emotions = ['anger', 'fear', 'sadness']
77
+ return emotions[label]
78
+
79
+ # --- Keyword Extraction & Ranking ---
80
+ # Load rank keyword
81
+ df_rank_keyword = pd.read_excel(os.path.join(base_path, 'Keyword_KeyBERT.xlsx'))
82
+ df_rank_keyword['keyword'] = df_rank_keyword['keyword'].apply(process_text)
83
+ df_rank_keyword['new_rank'] = df_rank_keyword['rank'].max() - df_rank_keyword['rank'] + 1
84
+
85
+ def rank_keywords(row):
86
+ total_ranking = 0
87
+ total_keyword = 0
88
+ for keyword in row:
89
+ frekuensi_rank = df_rank_keyword.loc[df_rank_keyword['keyword'] == keyword]
90
+ if not frekuensi_rank.empty:
91
+ total_ranking += frekuensi_rank['new_rank'].values[0]
92
+ total_keyword += 1
93
+ if total_keyword > 0:
94
+ return total_ranking / total_keyword
95
+ else:
96
+ return 0
97
+
98
+ def keyword(text):
99
+ # Model Keyword
100
+ sentence_model = SentenceTransformer("denaya/indoSBERT-large", trust_remote_code=True)
101
+
102
+ # Buat objek KeyBERT
103
+ kw_model = KeyBERT(model=sentence_model)
104
+
105
+ # Proses Keyword
106
+ stop_words = set(stopwords.words('indonesian'))
107
+ text = text.split()
108
+ text = [w for w in text if not w in stop_words]
109
+ text = ' '.join(text)
110
+ text = process_text(text)
111
+ keywords = kw_model.extract_keywords(text, top_n=5)
112
+ keyword = [keyword for keyword, _ in keywords]
113
+ rank = rank_keywords(keyword)
114
+
115
+ return keyword, rank
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Flask
2
+ numpy
3
+ pandas
4
+ wordcloud
5
+ tensorflow==2.12
6
+ keras==2.12
7
+ transformers==4.52.4
8
+ gunicorn
9
+ nltk
10
+ demoji
11
+ keybert
12
+ sentence-transformers
13
+ scikit-learn
14
+ regex