Spaces:

mahfud00
/

keluh-cerdas

Configuration error

App Files Files Community

mahfud00 commited on Jun 11

Commit

bbaa525

verified ·

1 Parent(s): 798d18d

Upload 5 files

Browse files

Files changed (5) hide show

Procfile +1 -0
README.md +2 -12
app.py +234 -0
helper.py +115 -0
requirements.txt +14 -0

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: python app.py

README.md CHANGED Viewed

@@ -1,12 +1,2 @@
----
-title: Keluh Cerdas
-emoji: 👀
-colorFrom: purple
-colorTo: yellow
-sdk: docker
-pinned: false
-license: apache-2.0
-short_description: keluh-cerdas app
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # Keluh-Cerdas
2	+ Keluh Cerdas Web

app.py ADDED Viewed

	@@ -0,0 +1,234 @@

+from flask import Flask, render_template, request, redirect, url_for, flash
+from datetime import date, datetime
+import os, json
+import numpy as np
+import pandas as pd
+from wordcloud import WordCloud
+from helper import predict_emotion,keyword, load_tflite_model
+app = Flask(__name__)
+@app.route('/')
+@app.route('/dashboard')
+def dashboard():
+    base_path = os.path.join('data')
+    dash_df = pd.read_excel(os.path.join(base_path, 'dataset_dash.xlsx'))
+    emosi_df = pd.read_excel(os.path.join(base_path, 'final_dataset.xlsx'))
+    # Info utama
+    total_keluhan = dash_df.shape[0]
+    topik_terbanyak = dash_df['Topik'].value_counts().idxmax()
+    instansi_terbanyak = dash_df['Instansi'].value_counts().idxmax()
+    # Proses tanggal
+    dash_df['tanggal_keluhan'] = pd.to_datetime(dash_df['tanggal_keluhan']).dt.normalize()
+    today = pd.to_datetime(datetime.today().date())
+    keluhan_hari_ini = dash_df[dash_df['tanggal_keluhan'] == today].shape[0]
+    # Data keluhan harian (last 7 days)
+    start_date = today - pd.Timedelta(days=6)
+    last_7_days_df = dash_df[(dash_df['tanggal_keluhan'] >= start_date) & (dash_df['tanggal_keluhan'] <= today)].copy()
+    last_7_days_df['nama_hari'] = last_7_days_df['tanggal_keluhan'].dt.day_name()
+    hari_en_to_id = {
+        'Monday': 'Senin',
+        'Tuesday': 'Selasa',
+        'Wednesday': 'Rabu',
+        'Thursday': 'Kamis',
+        'Friday': 'Jumat',
+        'Saturday': 'Sabtu',
+        'Sunday': 'Minggu'
+    }
+    last_7_days_df['nama_hari'] = last_7_days_df['nama_hari'].map(hari_en_to_id)
+    hari_urut = ['Senin', 'Selasa', 'Rabu', 'Kamis', 'Jumat', 'Sabtu', 'Minggu']
+    keluhan_per_hari = last_7_days_df.groupby('nama_hari').size().reindex(hari_urut, fill_value=0)
+    keluhan_harian_labels = keluhan_per_hari.index.tolist()
+    keluhan_harian_values = keluhan_per_hari.values.tolist()
+    # Data emosi
+    emosi_dist = emosi_df['emosi'].value_counts()
+    emosi_values = emosi_dist.values.tolist()
+    # Data keluhan bulanan dengan sorting berdasarkan tanggal
+    # Buat kolom untuk menampung data bulanan dan kelompokkan berdasarkan bulan dan tahun
+    dash_df['year'] = dash_df['tanggal_keluhan'].dt.year
+    dash_df['month'] = dash_df['tanggal_keluhan'].dt.month
+    # Kelompokkan berdasarkan bulan dan tahun
+    keluhan_bulanan = dash_df.groupby(['year', 'month']).size().reset_index()
+    keluhan_bulanan.columns = ['year', 'month', 'count']
+    # Urutkan berdasarkan tahun dan bulan
+    keluhan_bulanan = keluhan_bulanan.sort_values(['year', 'month'])
+    # Format label bulan-tahun untuk tampilan
+    import calendar
+    keluhan_bulanan['bulan_nama'] = keluhan_bulanan.apply(
+        lambda row: f"{calendar.month_abbr[row['month']]} {row['year']}",
+        axis=1
+    )
+    keluhan_bulanan_labels = keluhan_bulanan['bulan_nama'].tolist()
+    keluhan_bulanan_values = keluhan_bulanan['count'].tolist()
+    # ----- Top 5 Topik & Instansi ---------------------------------------
+    top_topik     = dash_df['Topik'].value_counts().head(5).reset_index()
+    top_topik.columns = ['Topik', 'Jumlah']
+    top_instansi  = dash_df['Instansi'].value_counts().head(5).reset_index()
+    top_instansi.columns = ['Instansi', 'Jumlah']
+    # ----- Word-cloud ----------------------------------------------------
+    text_wc = ' '.join(dash_df['keluhan'].dropna().astype(str))
+    wc_img  = WordCloud(width=800, height=400, background_color="white").generate(text_wc)
+    wc_path = os.path.join('static', 'wordcloud.png')
+    os.makedirs(os.path.dirname(wc_path), exist_ok=True)
+    wc_img.to_file(wc_path)          # simpan ⇒ static/wordcloud.png
+    return render_template(
+        'dashboard.html',
+        total_keluhan=total_keluhan,
+        topik_terbanyak=topik_terbanyak,
+        instansi_terbanyak=instansi_terbanyak,
+        keluhan_hari_ini=keluhan_hari_ini,
+        keluhan_harian_labels=keluhan_harian_labels,
+        keluhan_harian_values=keluhan_harian_values,
+        emosi_labels=emosi_dist.index.tolist(),
+        emosi_values=emosi_values,
+        keluhan_bulanan_labels=keluhan_bulanan_labels,
+        keluhan_bulanan_values=keluhan_bulanan_values,
+        wordcloud_image='wordcloud.png',
+        top_topik        = top_topik.itertuples(index=False),
+        top_instansi     = top_instansi.itertuples(index=False),
+    )
+@app.route('/ubah_status', methods=['POST'])
+def ubah_status():
+    keluhan_id = int(request.form['id'])
+    base_path = os.path.join('data')
+    file_path = os.path.join(base_path, 'vikor_fix.xlsx')
+    df = pd.read_excel(file_path)
+    # Update status menjadi 'selesai'
+    df.loc[df['id'] == keluhan_id, 'status'] = 'selesai'
+    # Simpan kembali
+    df.to_excel(file_path, index=False)
+    return redirect(url_for('leaderboard'))
+@app.route('/leaderboard')
+def leaderboard():
+    base_path = os.path.join('data')
+    final_df  = pd.read_excel(os.path.join(base_path, 'vikor_fix.xlsx'))
+     # Filter hanya data yang belum selesai
+    df_pending = final_df[final_df['status'] != 'selesai']
+    # ----- Hitung VIKOR -----
+    f_emosi_plus  = df_pending['new_emosi'].max()
+    f_emosi_min   = df_pending['new_emosi'].min()
+    f_ranking_plus= df_pending['new_keyword'].max()
+    f_ranking_min = df_pending['new_keyword'].min()
+    emosi_denom  = f_emosi_plus - f_emosi_min
+    ranking_denom = f_ranking_plus - f_ranking_min
+    df_pending['normalisasi_emosi'] = (f_emosi_plus - df_pending['new_emosi']) / (emosi_denom if emosi_denom != 0 else 1)
+    df_pending['normalisasi_ranking'] = (f_ranking_plus - df_pending['new_keyword']) / (ranking_denom if ranking_denom != 0 else 1)
+    df_pending['normalisasi_bobot_emosi']   = 0.5 * df_pending['normalisasi_emosi']
+    df_pending['normalisasi_bobot_ranking'] = 0.5 * df_pending['normalisasi_ranking']
+    df_pending['ultility'] = df_pending['normalisasi_bobot_emosi'] + df_pending['normalisasi_bobot_ranking']
+    df_pending['regret']   = df_pending[['normalisasi_bobot_emosi', 'normalisasi_bobot_ranking']].max(axis=1)
+    s_plus = df_pending['ultility'].max()
+    s_min  = df_pending['ultility'].min()
+    r_plus = df_pending['regret'].max()
+    r_min  = df_pending['regret'].min()
+    df_pending['vikor'] = 0.5 * ((df_pending['ultility'] - s_min) / (s_plus - s_min)) + \
+                          0.5 * ((df_pending['regret']   - r_min) / (r_plus - r_min))
+    df_pending['rank'] = df_pending['vikor'].rank(ascending=True).astype(int)
+    # ----- Keluhan prioritas (10 skor vikor tertinggi) -------------------
+    prioritas_df = df_pending.sort_values(by='vikor', ascending=True).head(10)
+    # ----- Render ke template -------------------------------------------
+    return render_template(
+        'leaderboard.html',
+        keluhan_prioritas = prioritas_df
+    )
+@app.route('/form', methods=['GET', 'POST'])
+def form():
+    # Load dataframes
+    base_path = os.path.join('data')
+    instansi_df = pd.read_csv(os.path.join(base_path, 'mediacenter_instansi_202311220929.csv'), sep=';')
+    kecamatan_df = pd.read_csv(os.path.join(base_path, 'mediacenter_kecamatan_202311220929.csv'), sep=';')
+    kelurahan_df = pd.read_csv(os.path.join(base_path, 'mediacenter_kelurahan_202311220929.csv'), sep=';')
+    topik_df = pd.read_csv(os.path.join(base_path, 'mediacenter_topik_202311230834.csv'), sep=';')
+    # Join antar dataframe agar dapatkan nama kecamatan pada kelurahan
+    kecamatan_dict = kecamatan_df.set_index('id')['name'].to_dict()
+    kelurahan_df['kecamatan_name'] = kelurahan_df['kecamatan_id'].map(kecamatan_dict)
+    # Buat mapping: kecamatan_name -> list of kelurahan names
+    kelurahan_map = kelurahan_df.groupby('kecamatan_name')['name'].apply(list).to_dict()
+    message = None
+    if request.method == 'POST':
+        # Form
+        keluhan = request.form.get('keluhan')
+        instansi = request.form.get('instansi')
+        tanggal_keluhan = request.form.get('tanggal_keluhan')
+        kecamatan = request.form.get('kecamatan')
+        kelurahan = request.form.get('kelurahan')
+        topik = request.form.get('topik')
+        # Prediksi emosi dan ekstrak keyword
+        interpreter = load_tflite_model()
+        emosi = predict_emotion(keluhan, interpreter)
+        keywords, ranked_keywords = keyword(keluhan)
+        keywords_str = ', '.join(keywords)
+        emotion_mapping = {
+            'anger': 3,
+            'fear': 2,
+            'sadness': 1
+        }
+        new_emosi = emotion_mapping.get(emosi, emosi)
+        # Buat dictionary data baru
+        new_data = {
+            'keluhan': keluhan,
+            'instansi': instansi,
+            'tanggal_keluhan': tanggal_keluhan,
+            'kecamatan': kecamatan,
+            'kelurahan': kelurahan,
+            'topik': topik,
+            'emosi': emosi,
+            'new_emosi': new_emosi,
+            'new_keyword': ranked_keywords,
+            'keywords': keywords_str,
+            'status': 'belum_selesai'
+        }
+        # Simpan ke final_dataset.xlsx
+        # Cek apakah file sudah ada, jika tidak buat baru
+        dataset_path = os.path.join('data', 'vikor_fix.xlsx')
+        if not os.path.exists(dataset_path):
+            df = pd.DataFrame([new_data])
+        else:
+            df = pd.read_excel(dataset_path)
+            df = pd.concat([df, pd.DataFrame([new_data])], ignore_index=True)
+        df.to_excel(dataset_path, index=False)
+        message = "✅ Keluhan berhasil disimpan!"
+    # Pass dataframes to the template
+    return render_template('form.html', instansi=instansi_df,
+                           kecamatan=kecamatan_df, kelurahan=kelurahan_df,
+                           topik=topik_df, kelurahan_map=json.dumps(kelurahan_map), message=message)

helper.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import demoji
+import re
+import pandas as pd
+import numpy as np
+import nltk
+import keras
+from transformers import BertTokenizer, TFBertModel
+import tensorflow as tf
+from string import punctuation
+from keybert import KeyBERT
+from nltk.corpus import stopwords
+from sentence_transformers import SentenceTransformer
+import os
+import warnings
+warnings.filterwarnings("ignore")
+# --- Configuration & Global Variables ---
+MAX_LENGTH = 128
+base_path = os.path.join('data')
+model_path = os.path.join('Model')
+# --- Helper: Download NLTK data ---
+nltk.download('stopwords')
+# --- Load Resources ---
+alay_dict = pd.read_csv(os.path.join(base_path, 'kamus_alay.csv'), names=['alay', 'normal'], encoding='latin-1')
+alay_dict_map = dict(zip(alay_dict['alay'], alay_dict['normal']))
+stop_words = set(stopwords.words('indonesian'))
+tokenizer = BertTokenizer.from_pretrained("indobenchmark/indobert-large-p1")
+bert_model = TFBertModel.from_pretrained("indobenchmark/indobert-large-p1")
+lstm_model = keras.models.load_model(os.path.join(model_path, 'indobert_lstm_model.keras'))
+# --- Preprocessing Functions ---
+def process_text(text):
+    # Baca kamus CSV ke dalam DataFrame
+    global alay_dict_map
+    text = str(text) # Convert Object to str
+    text = text.lower() # Lowercase text
+    text = re.sub(r'\d+', '', text) # Remove number
+    text = text.replace('\\n\\n\\n', ' ')
+    text = text.replace('\\n\\n', ' ')
+    text = text.replace('\\n', ' ')
+    text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE) # Remove link
+    text = re.sub(f"[{re.escape(punctuation)}]", " ", text) # Remove punctuation
+    text = demoji.replace(text, "") # Remove emoji
+    text = " ".join(text.split()) # Remove extra spaces, tabs, and new lines
+    text = text.split()
+    text = [alay_dict_map[word] if word in alay_dict_map else word for word in text]
+    text = ' '.join(text)
+    return text
+# --- Emotion Prediction ---
+def load_tflite_model(tflite_path="Model/indobert_lstm_model.tflite"):
+    interpreter = tf.lite.Interpreter(model_path=tflite_path)
+    interpreter.allocate_tensors()
+    return interpreter
+def predict_emotion(text, interpreter):
+    cleaned = process_text(text)
+    tokens = tokenizer(cleaned, return_tensors="tf", padding='max_length', truncation=True, max_length=128)
+    # Ambil seluruh token embeddings (bukan hanya CLS token)
+    outputs = bert_model(**tokens)
+    embeddings = outputs.last_hidden_state  # shape (1, 128, 1024)
+    input_data = embeddings.numpy().astype(np.float32)  # sesuai shape TFLite
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+    interpreter.set_tensor(input_details[0]['index'], input_data)
+    interpreter.invoke()
+    output = interpreter.get_tensor(output_details[0]['index'])
+    label = np.argmax(output, axis=1)[0]
+    emotions = ['anger', 'fear', 'sadness']
+    return emotions[label]
+# --- Keyword Extraction & Ranking ---
+# Load rank keyword
+df_rank_keyword = pd.read_excel(os.path.join(base_path, 'Keyword_KeyBERT.xlsx'))
+df_rank_keyword['keyword'] = df_rank_keyword['keyword'].apply(process_text)
+df_rank_keyword['new_rank'] = df_rank_keyword['rank'].max() - df_rank_keyword['rank'] + 1
+def rank_keywords(row):
+    total_ranking = 0
+    total_keyword = 0
+    for keyword in row:
+        frekuensi_rank = df_rank_keyword.loc[df_rank_keyword['keyword'] == keyword]
+        if not frekuensi_rank.empty:
+            total_ranking += frekuensi_rank['new_rank'].values[0]
+            total_keyword += 1
+    if total_keyword > 0:
+        return total_ranking / total_keyword
+    else:
+        return 0
+def keyword(text):
+    # Model Keyword
+    sentence_model = SentenceTransformer("denaya/indoSBERT-large", trust_remote_code=True)
+    # Buat objek KeyBERT
+    kw_model = KeyBERT(model=sentence_model)
+    # Proses Keyword
+    stop_words = set(stopwords.words('indonesian'))
+    text = text.split()
+    text = [w for w in text if not w in stop_words]
+    text = ' '.join(text)
+    text = process_text(text)
+    keywords = kw_model.extract_keywords(text, top_n=5)
+    keyword = [keyword for keyword, _ in keywords]
+    rank = rank_keywords(keyword)
+    return keyword, rank

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+Flask
+numpy
+pandas
+wordcloud
+tensorflow==2.12
+keras==2.12
+transformers==4.52.4
+gunicorn
+nltk
+demoji
+keybert
+sentence-transformers
+scikit-learn
+regex