import gradio as gr from transformers import pipeline import csv, tempfile # ---------- Pipelines ---------- hate = pipeline( "text-classification", model="hossam87/bert-base-arabic-hate-speech", tokenizer="hossam87/bert-base-arabic-hate-speech", return_all_scores=False ) dialect = pipeline( "text-classification", model="IbrahimAmin/marbertv2-arabic-written-dialect-classifier", tokenizer="IbrahimAmin/marbertv2-arabic-written-dialect-classifier", return_all_scores=False ) # ---------- Inference ---------- def analyze(text: str): try: if not text or not text.strip(): return ("", "", "", "", "", "Please enter some Arabic text.") h = hate(text)[0] d = dialect(text)[0] hate_label = h.get("label", "") hate_conf = float(h.get("score", 0.0)) dial_label = d.get("label", "") dial_conf = float(d.get("score", 0.0)) weights = { "Neutral": 0.0, "Offensive": 0.5, "Sexism": 1.0, "Racism": 1.0, "Religious Discrimination": 1.0, } score = hate_conf * weights.get(hate_label, 0.0) if hate_label != "Neutral" and weights.get(hate_label, 0.0) >= 1.0: action = "🚨 Immediate Review — Severe content detected. Escalate to moderators." elif hate_label != "Neutral": action = "⚠️ Potentially Harmful — Contains offensive content. Please review." elif score >= 0.49: action = "⚠️ Borderline — Review recommended." else: action = "✅ Safe — No action needed." return (hate_label, f"{hate_conf:.2f}", dial_label, f"{dial_conf:.2f}", f"{score:.2f}", action) except Exception as e: # keep the UI alive even if a model throws return ("", "", "", "", "", f"Runtime error: {e}") def analyze_file(file): if file is None: return [], None, "Please upload a .txt file." try: # Read uploaded file (utf-8 with fallback) text = open(file.name, "r", encoding="utf-8", errors="ignore").read() lines = [ln.strip() for ln in text.splitlines() if ln.strip()] rows = [] headers = ["#", "Text (truncated)", "Hate Label", "Label Conf.", "Dialect", "Dialect Conf.", "Threat Score", "Recommended Action"] # Process up to 200 lines to keep UI responsive for i, ln in enumerate(lines[:200], start=1): hate_label, hate_conf, dial_label, dial_conf, score, action = analyze(ln) rows.append([ i, (ln[:80] + "…") if len(ln) > 80 else ln, hate_label, hate_conf, dial_label, dial_conf, score, action ]) # Write CSV to temp file for download tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") with open(tmp.name, "w", encoding="utf-8", newline="") as f: writer = csv.writer(f) writer.writerow(headers) writer.writerows(rows) status = f"Processed {len(rows)} lines." return rows, tmp.name, status except Exception as e: return [], None, f"Error reading file: {e}" # ---------- CSS (sidebar only) ---------- CSS = """ /* Only style the sidebar box; leave main area/theme untouched */ #sidebar-box { border: 1px solid rgba(255,255,255,0.08); border-radius: 10px; padding: 20px; } """ # ---------- UI ---------- with gr.Blocks(css=CSS) as demo: with gr.Row(equal_height=True): # Sidebar with gr.Column(scale=1): with gr.Group(elem_id="sidebar-box"): gr.Markdown(""" ## 🛡️ AI Watchdog: Arabic Content Safety Analyzer **Purpose** Analyze Arabic text for harmful or threatening language. **Features** - **Hate Speech Classification** - **Dialect Detection** - **Threat Severity Score** - **Recommended Action** (rule-based) **How to Use** - Type text or - Upload a .txt file (one entry per line) ### Dialect Abbreviation Guide | Abbreviation | Meaning | |--------------|---------| | **MAGHREB** | North African dialects | | **LEV** | Levantine dialects | | **MSA** | Modern Standard Arabic | | **GLF** | Gulf dialects | | **EGY** | Egyptian Arabic | """) # Main area with gr.Column(scale=3): with gr.Tabs(): with gr.Tab("Text"): gr.Markdown("### Enter Arabic Text for Analysis") input_text = gr.Textbox(lines=4, placeholder="اكتب هنا...", label="Arabic Text") out_hate = gr.Textbox(label="Hate Speech Label", interactive=False) out_hate_conf = gr.Textbox(label="Label Confidence", interactive=False) out_dialect = gr.Textbox(label="Dialect", interactive=False) out_dialect_conf = gr.Textbox(label="Dialect Confidence", interactive=False) out_score = gr.Textbox(label="Threat Score", interactive=False) out_action = gr.Textbox(label="Recommended Action", interactive=False) analyze_btn = gr.Button("Analyze", variant="primary") analyze_btn.click( fn=analyze, inputs=input_text, outputs=[out_hate, out_hate_conf, out_dialect, out_dialect_conf, out_score, out_action], ) with gr.Tab("Upload"): gr.Markdown("### Upload a .txt file (one entry per line)") file_input = gr.File(file_types=[".txt"], label="Upload .txt") table = gr.Dataframe( headers=["#", "Text (truncated)", "Hate Label", "Label Conf.", "Dialect", "Dialect Conf.", "Threat Score", "Recommended Action"], row_count=1, col_count=8, wrap=True, interactive=False ) download_csv = gr.File(label="Download Results (CSV)") status = gr.Markdown() run_file = gr.Button("Analyze File", variant="primary") run_file.click( fn=analyze_file, inputs=file_input, outputs=[table, download_csv, status] ) if __name__ == "__main__": demo.launch()