Spaces:

Mubarak2507
/

Arabic_Content_Safety_Analyser

Running

File size: 6,762 Bytes

c1997df
 
ba294cc
c1997df
bd35ac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef4e241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba294cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef4e241
cd5d98f
ef4e241
 
 
 
 
9b60cd8
19be099
 
cd5d98f
ef4e241
19be099
ba294cc
e649907
ef4e241
d4cee6d
28faf01
2add840
d4cee6d
 
dc07e95
d4cee6d
ba294cc
358fb0c
ba294cc
 
dc07e95
bd35ac7
a0c2f3c
ba294cc
358fb0c
 
 
 
 
 
 
 
 
09373c9
dc07e95
ba294cc
dc07e95
ba294cc
a0c2f3c
ba294cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0c2f3c
ba294cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358fb0c
19be099
d4cee6d
358fb0c

import gradio as gr
from transformers import pipeline
import csv, tempfile

# ---------- Pipelines ----------
hate = pipeline(
    "text-classification",
    model="hossam87/bert-base-arabic-hate-speech",
    tokenizer="hossam87/bert-base-arabic-hate-speech",
    return_all_scores=False
)

dialect = pipeline(
    "text-classification",
    model="IbrahimAmin/marbertv2-arabic-written-dialect-classifier",
    tokenizer="IbrahimAmin/marbertv2-arabic-written-dialect-classifier",
    return_all_scores=False
)

# ---------- Inference ----------
def analyze(text: str):
    try:
        if not text or not text.strip():
            return ("", "", "", "", "", "Please enter some Arabic text.")

        h = hate(text)[0]
        d = dialect(text)[0]

        hate_label = h.get("label", "")
        hate_conf = float(h.get("score", 0.0))
        dial_label = d.get("label", "")
        dial_conf = float(d.get("score", 0.0))

        weights = {
            "Neutral": 0.0,
            "Offensive": 0.5,
            "Sexism": 1.0,
            "Racism": 1.0,
            "Religious Discrimination": 1.0,
        }
        score = hate_conf * weights.get(hate_label, 0.0)

        if hate_label != "Neutral" and weights.get(hate_label, 0.0) >= 1.0:
            action = "🚨 Immediate Review — Severe content detected. Escalate to moderators."
        elif hate_label != "Neutral":
            action = "⚠️ Potentially Harmful — Contains offensive content. Please review."
        elif score >= 0.49:
            action = "⚠️ Borderline — Review recommended."
        else:
            action = "✅ Safe — No action needed."

        return (hate_label, f"{hate_conf:.2f}", dial_label, f"{dial_conf:.2f}", f"{score:.2f}", action)

    except Exception as e:
        # keep the UI alive even if a model throws
        return ("", "", "", "", "", f"Runtime error: {e}")

def analyze_file(file):
    if file is None:
        return [], None, "Please upload a .txt file."

    try:
        # Read uploaded file (utf-8 with fallback)
        text = open(file.name, "r", encoding="utf-8", errors="ignore").read()
        lines = [ln.strip() for ln in text.splitlines() if ln.strip()]

        rows = []
        headers = ["#", "Text (truncated)", "Hate Label", "Label Conf.",
                   "Dialect", "Dialect Conf.", "Threat Score", "Recommended Action"]

        # Process up to 200 lines to keep UI responsive
        for i, ln in enumerate(lines[:200], start=1):
            hate_label, hate_conf, dial_label, dial_conf, score, action = analyze(ln)
            rows.append([
                i,
                (ln[:80] + "…") if len(ln) > 80 else ln,
                hate_label, hate_conf, dial_label, dial_conf, score, action
            ])

        # Write CSV to temp file for download
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
        with open(tmp.name, "w", encoding="utf-8", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(headers)
            writer.writerows(rows)

        status = f"Processed {len(rows)} lines."
        return rows, tmp.name, status

    except Exception as e:
        return [], None, f"Error reading file: {e}"
        
# ---------- CSS (sidebar only) ----------
CSS = """
/* Only style the sidebar box; leave main area/theme untouched */
#sidebar-box {
  border: 1px solid rgba(255,255,255,0.08);
  border-radius: 10px;
  padding: 20px;
}
"""

# ---------- UI ----------
with gr.Blocks(css=CSS) as demo:
    with gr.Row(equal_height=True):
        # Sidebar
        with gr.Column(scale=1):
            with gr.Group(elem_id="sidebar-box"):
                gr.Markdown("""
                ## 🛡️ AI Watchdog: Arabic Content Safety Analyzer

                **Purpose**  
                Analyze Arabic text for harmful or threatening language.

                **Features**
                - **Hate Speech Classification**
                - **Dialect Detection**
                - **Threat Severity Score**
                - **Recommended Action** (rule-based)

                **How to Use**
                - Type text or
                - Upload a .txt file (one entry per line)

                ### Dialect Abbreviation Guide
                | Abbreviation | Meaning |
                |--------------|---------|
                | **MAGHREB**  | North African dialects |
                | **LEV**      | Levantine dialects |
                | **MSA**      | Modern Standard Arabic |
                | **GLF**      | Gulf dialects |
                | **EGY**      | Egyptian Arabic |
                """)

        # Main area
        with gr.Column(scale=3):
            with gr.Tabs():
                with gr.Tab("Text"):
                    gr.Markdown("### Enter Arabic Text for Analysis")
                    input_text = gr.Textbox(lines=4, placeholder="اكتب هنا...", label="Arabic Text")

                    out_hate = gr.Textbox(label="Hate Speech Label", interactive=False)
                    out_hate_conf = gr.Textbox(label="Label Confidence", interactive=False)
                    out_dialect = gr.Textbox(label="Dialect", interactive=False)
                    out_dialect_conf = gr.Textbox(label="Dialect Confidence", interactive=False)
                    out_score = gr.Textbox(label="Threat Score", interactive=False)
                    out_action = gr.Textbox(label="Recommended Action", interactive=False)

                    analyze_btn = gr.Button("Analyze", variant="primary")
                    analyze_btn.click(
                        fn=analyze,
                        inputs=input_text,
                        outputs=[out_hate, out_hate_conf, out_dialect, out_dialect_conf, out_score, out_action],
                    )

                with gr.Tab("Upload"):
                    gr.Markdown("### Upload a .txt file (one entry per line)")
                    file_input = gr.File(file_types=[".txt"], label="Upload .txt")
                    table = gr.Dataframe(
                        headers=["#", "Text (truncated)", "Hate Label", "Label Conf.", "Dialect",
                                 "Dialect Conf.", "Threat Score", "Recommended Action"],
                        row_count=1, col_count=8, wrap=True, interactive=False
                    )
                    download_csv = gr.File(label="Download Results (CSV)")
                    status = gr.Markdown()

                    run_file = gr.Button("Analyze File", variant="primary")
                    run_file.click(
                        fn=analyze_file,
                        inputs=file_input,
                        outputs=[table, download_csv, status]
                    )

if __name__ == "__main__":
    demo.launch()