Mubarak2507's picture
Update app.py
358fb0c verified
import gradio as gr
from transformers import pipeline
import csv, tempfile
# ---------- Pipelines ----------
hate = pipeline(
"text-classification",
model="hossam87/bert-base-arabic-hate-speech",
tokenizer="hossam87/bert-base-arabic-hate-speech",
return_all_scores=False
)
dialect = pipeline(
"text-classification",
model="IbrahimAmin/marbertv2-arabic-written-dialect-classifier",
tokenizer="IbrahimAmin/marbertv2-arabic-written-dialect-classifier",
return_all_scores=False
)
# ---------- Inference ----------
def analyze(text: str):
try:
if not text or not text.strip():
return ("", "", "", "", "", "Please enter some Arabic text.")
h = hate(text)[0]
d = dialect(text)[0]
hate_label = h.get("label", "")
hate_conf = float(h.get("score", 0.0))
dial_label = d.get("label", "")
dial_conf = float(d.get("score", 0.0))
weights = {
"Neutral": 0.0,
"Offensive": 0.5,
"Sexism": 1.0,
"Racism": 1.0,
"Religious Discrimination": 1.0,
}
score = hate_conf * weights.get(hate_label, 0.0)
if hate_label != "Neutral" and weights.get(hate_label, 0.0) >= 1.0:
action = "🚨 Immediate Review — Severe content detected. Escalate to moderators."
elif hate_label != "Neutral":
action = "⚠️ Potentially Harmful — Contains offensive content. Please review."
elif score >= 0.49:
action = "⚠️ Borderline — Review recommended."
else:
action = "✅ Safe — No action needed."
return (hate_label, f"{hate_conf:.2f}", dial_label, f"{dial_conf:.2f}", f"{score:.2f}", action)
except Exception as e:
# keep the UI alive even if a model throws
return ("", "", "", "", "", f"Runtime error: {e}")
def analyze_file(file):
if file is None:
return [], None, "Please upload a .txt file."
try:
# Read uploaded file (utf-8 with fallback)
text = open(file.name, "r", encoding="utf-8", errors="ignore").read()
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
rows = []
headers = ["#", "Text (truncated)", "Hate Label", "Label Conf.",
"Dialect", "Dialect Conf.", "Threat Score", "Recommended Action"]
# Process up to 200 lines to keep UI responsive
for i, ln in enumerate(lines[:200], start=1):
hate_label, hate_conf, dial_label, dial_conf, score, action = analyze(ln)
rows.append([
i,
(ln[:80] + "…") if len(ln) > 80 else ln,
hate_label, hate_conf, dial_label, dial_conf, score, action
])
# Write CSV to temp file for download
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
with open(tmp.name, "w", encoding="utf-8", newline="") as f:
writer = csv.writer(f)
writer.writerow(headers)
writer.writerows(rows)
status = f"Processed {len(rows)} lines."
return rows, tmp.name, status
except Exception as e:
return [], None, f"Error reading file: {e}"
# ---------- CSS (sidebar only) ----------
CSS = """
/* Only style the sidebar box; leave main area/theme untouched */
#sidebar-box {
border: 1px solid rgba(255,255,255,0.08);
border-radius: 10px;
padding: 20px;
}
"""
# ---------- UI ----------
with gr.Blocks(css=CSS) as demo:
with gr.Row(equal_height=True):
# Sidebar
with gr.Column(scale=1):
with gr.Group(elem_id="sidebar-box"):
gr.Markdown("""
## 🛡️ AI Watchdog: Arabic Content Safety Analyzer
**Purpose**
Analyze Arabic text for harmful or threatening language.
**Features**
- **Hate Speech Classification**
- **Dialect Detection**
- **Threat Severity Score**
- **Recommended Action** (rule-based)
**How to Use**
- Type text or
- Upload a .txt file (one entry per line)
### Dialect Abbreviation Guide
| Abbreviation | Meaning |
|--------------|---------|
| **MAGHREB** | North African dialects |
| **LEV** | Levantine dialects |
| **MSA** | Modern Standard Arabic |
| **GLF** | Gulf dialects |
| **EGY** | Egyptian Arabic |
""")
# Main area
with gr.Column(scale=3):
with gr.Tabs():
with gr.Tab("Text"):
gr.Markdown("### Enter Arabic Text for Analysis")
input_text = gr.Textbox(lines=4, placeholder="اكتب هنا...", label="Arabic Text")
out_hate = gr.Textbox(label="Hate Speech Label", interactive=False)
out_hate_conf = gr.Textbox(label="Label Confidence", interactive=False)
out_dialect = gr.Textbox(label="Dialect", interactive=False)
out_dialect_conf = gr.Textbox(label="Dialect Confidence", interactive=False)
out_score = gr.Textbox(label="Threat Score", interactive=False)
out_action = gr.Textbox(label="Recommended Action", interactive=False)
analyze_btn = gr.Button("Analyze", variant="primary")
analyze_btn.click(
fn=analyze,
inputs=input_text,
outputs=[out_hate, out_hate_conf, out_dialect, out_dialect_conf, out_score, out_action],
)
with gr.Tab("Upload"):
gr.Markdown("### Upload a .txt file (one entry per line)")
file_input = gr.File(file_types=[".txt"], label="Upload .txt")
table = gr.Dataframe(
headers=["#", "Text (truncated)", "Hate Label", "Label Conf.", "Dialect",
"Dialect Conf.", "Threat Score", "Recommended Action"],
row_count=1, col_count=8, wrap=True, interactive=False
)
download_csv = gr.File(label="Download Results (CSV)")
status = gr.Markdown()
run_file = gr.Button("Analyze File", variant="primary")
run_file.click(
fn=analyze_file,
inputs=file_input,
outputs=[table, download_csv, status]
)
if __name__ == "__main__":
demo.launch()