Spaces:

rizkims
/

hoaxdetector

Runtime error

App Files Files Community

rizkims commited on Jun 2

Commit

a60f537

1 Parent(s): c2b6622

Initial commit for hoax detector

Browse files

Files changed (4) hide show

app.py +72 -0
ensemble_model.pkl +3 -0
requirements.txt +5 -0
vectorizer.pkl +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import gradio as gr
+import pickle
+from transformers import pipeline
+import re
+import unicodedata
+# Load pipelines
+qa_pipeline = pipeline("question-answering", model="Rifky/Indobert-QA", tokenizer="Rifky/Indobert-QA")
+ner_pipeline = pipeline("ner", model="cahya/bert-base-indonesian-NER", tokenizer="cahya/bert-base-indonesian-NER", grouped_entities=True)
+# Load model hoax
+with open("ensemble_model.pkl", "rb") as f:
+    model = pickle.load(f)
+with open("vectorizer.pkl", "rb") as f:
+    vectorizer = pickle.load(f)
+def clean_text(text):
+    text = re.sub(r'[\n\r]+', ' ', text)
+    text = re.sub(r'\s{2,}', ' ', text)
+    text = text.strip()
+    text = unicodedata.normalize('NFKC', text)
+    text = text.lower()
+    text = re.sub(r'https?://\S+|www\.\S+', ' url ', text)
+    asian_char_pattern = re.compile(
+        r'[\u4e00-\u9FFF\u30A0-\u30FF\u3040-\u309F\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F]'
+    )
+    text = asian_char_pattern.sub(' ', text)
+    unwanted_scripts_pattern = re.compile(
+        r'[\u2D30-\u2D7F\uA980-\uA9DF\u1E00-\u1EFF\u0250-\u02AF\u1D00-\u1D7F]'
+    )
+    text = ' '.join(word for word in text.split() if not unwanted_scripts_pattern.search(word))
+    text = re.sub(r'[^a-z0-9\s.,!?;:\'\"()-]', ' ', text)
+    return re.sub(r'\s{2,}', ' ', text).strip()
+# === Fungsi Utama ===
+def detect_hoax(text):
+    cleaned = clean_text(text)
+    tfidf = vectorizer.transform([cleaned])
+    prediction = model.predict(tfidf)[0]
+    return "Hoaks" if prediction == 1 else "Bukan Hoaks"
+def run_qa(context, question):
+    if not context or not question:
+        return "Masukkan context dan pertanyaan."
+    result = qa_pipeline(question=question, context=context)
+    return result["answer"]
+def run_ner(text):
+    if not text:
+        return []
+    result = ner_pipeline(text)
+    return [(ent["word"], ent["entity_group"]) for ent in result]
+# === Gradio UI ===
+hoax_tab = gr.Interface(fn=detect_hoax, inputs="text", outputs="text", title="Deteksi Hoaks")
+qa_tab = gr.Interface(
+    fn=run_qa,
+    inputs=[gr.Textbox(label="Context"), gr.Textbox(label="Pertanyaan")],
+    outputs="text",
+    title="Question Answering"
+)
+ner_tab = gr.Interface(
+    fn=run_ner,
+    inputs="text",
+    outputs=gr.HighlightedText(label="Hasil NER", combine_adjacent=True),
+    title="Named Entity Recognition"
+)
+gr.TabbedInterface([hoax_tab, qa_tab, ner_tab], ["Deteksi Hoaks", "QA", "NER"]).launch()

ensemble_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:354288cdacff965e08c0de8dff13282f64f5c546b30d709d00611ca10e7d2d39
+size 599691306

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+scikit-learn
+transformers
+torch
+regex

vectorizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cede0d09e18bc5cfb31d36f2b38fe1635f20bb48fcfc34f1c01fe0bea9183c3f
+size 3180887