|
import gradio as gr |
|
from transformers import pipeline |
|
import csv, tempfile |
|
|
|
|
|
hate = pipeline( |
|
"text-classification", |
|
model="hossam87/bert-base-arabic-hate-speech", |
|
tokenizer="hossam87/bert-base-arabic-hate-speech", |
|
return_all_scores=False |
|
) |
|
|
|
dialect = pipeline( |
|
"text-classification", |
|
model="IbrahimAmin/marbertv2-arabic-written-dialect-classifier", |
|
tokenizer="IbrahimAmin/marbertv2-arabic-written-dialect-classifier", |
|
return_all_scores=False |
|
) |
|
|
|
|
|
def analyze(text: str): |
|
try: |
|
if not text or not text.strip(): |
|
return ("", "", "", "", "", "Please enter some Arabic text.") |
|
|
|
h = hate(text)[0] |
|
d = dialect(text)[0] |
|
|
|
hate_label = h.get("label", "") |
|
hate_conf = float(h.get("score", 0.0)) |
|
dial_label = d.get("label", "") |
|
dial_conf = float(d.get("score", 0.0)) |
|
|
|
weights = { |
|
"Neutral": 0.0, |
|
"Offensive": 0.5, |
|
"Sexism": 1.0, |
|
"Racism": 1.0, |
|
"Religious Discrimination": 1.0, |
|
} |
|
score = hate_conf * weights.get(hate_label, 0.0) |
|
|
|
if hate_label != "Neutral" and weights.get(hate_label, 0.0) >= 1.0: |
|
action = "🚨 Immediate Review — Severe content detected. Escalate to moderators." |
|
elif hate_label != "Neutral": |
|
action = "⚠️ Potentially Harmful — Contains offensive content. Please review." |
|
elif score >= 0.49: |
|
action = "⚠️ Borderline — Review recommended." |
|
else: |
|
action = "✅ Safe — No action needed." |
|
|
|
return (hate_label, f"{hate_conf:.2f}", dial_label, f"{dial_conf:.2f}", f"{score:.2f}", action) |
|
|
|
except Exception as e: |
|
|
|
return ("", "", "", "", "", f"Runtime error: {e}") |
|
|
|
def analyze_file(file): |
|
if file is None: |
|
return [], None, "Please upload a .txt file." |
|
|
|
try: |
|
|
|
text = open(file.name, "r", encoding="utf-8", errors="ignore").read() |
|
lines = [ln.strip() for ln in text.splitlines() if ln.strip()] |
|
|
|
rows = [] |
|
headers = ["#", "Text (truncated)", "Hate Label", "Label Conf.", |
|
"Dialect", "Dialect Conf.", "Threat Score", "Recommended Action"] |
|
|
|
|
|
for i, ln in enumerate(lines[:200], start=1): |
|
hate_label, hate_conf, dial_label, dial_conf, score, action = analyze(ln) |
|
rows.append([ |
|
i, |
|
(ln[:80] + "…") if len(ln) > 80 else ln, |
|
hate_label, hate_conf, dial_label, dial_conf, score, action |
|
]) |
|
|
|
|
|
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") |
|
with open(tmp.name, "w", encoding="utf-8", newline="") as f: |
|
writer = csv.writer(f) |
|
writer.writerow(headers) |
|
writer.writerows(rows) |
|
|
|
status = f"Processed {len(rows)} lines." |
|
return rows, tmp.name, status |
|
|
|
except Exception as e: |
|
return [], None, f"Error reading file: {e}" |
|
|
|
|
|
CSS = """ |
|
/* Only style the sidebar box; leave main area/theme untouched */ |
|
#sidebar-box { |
|
border: 1px solid rgba(255,255,255,0.08); |
|
border-radius: 10px; |
|
padding: 20px; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(css=CSS) as demo: |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_id="sidebar-box"): |
|
gr.Markdown(""" |
|
## 🛡️ AI Watchdog: Arabic Content Safety Analyzer |
|
|
|
**Purpose** |
|
Analyze Arabic text for harmful or threatening language. |
|
|
|
**Features** |
|
- **Hate Speech Classification** |
|
- **Dialect Detection** |
|
- **Threat Severity Score** |
|
- **Recommended Action** (rule-based) |
|
|
|
**How to Use** |
|
- Type text or |
|
- Upload a .txt file (one entry per line) |
|
|
|
### Dialect Abbreviation Guide |
|
| Abbreviation | Meaning | |
|
|--------------|---------| |
|
| **MAGHREB** | North African dialects | |
|
| **LEV** | Levantine dialects | |
|
| **MSA** | Modern Standard Arabic | |
|
| **GLF** | Gulf dialects | |
|
| **EGY** | Egyptian Arabic | |
|
""") |
|
|
|
|
|
with gr.Column(scale=3): |
|
with gr.Tabs(): |
|
with gr.Tab("Text"): |
|
gr.Markdown("### Enter Arabic Text for Analysis") |
|
input_text = gr.Textbox(lines=4, placeholder="اكتب هنا...", label="Arabic Text") |
|
|
|
out_hate = gr.Textbox(label="Hate Speech Label", interactive=False) |
|
out_hate_conf = gr.Textbox(label="Label Confidence", interactive=False) |
|
out_dialect = gr.Textbox(label="Dialect", interactive=False) |
|
out_dialect_conf = gr.Textbox(label="Dialect Confidence", interactive=False) |
|
out_score = gr.Textbox(label="Threat Score", interactive=False) |
|
out_action = gr.Textbox(label="Recommended Action", interactive=False) |
|
|
|
analyze_btn = gr.Button("Analyze", variant="primary") |
|
analyze_btn.click( |
|
fn=analyze, |
|
inputs=input_text, |
|
outputs=[out_hate, out_hate_conf, out_dialect, out_dialect_conf, out_score, out_action], |
|
) |
|
|
|
with gr.Tab("Upload"): |
|
gr.Markdown("### Upload a .txt file (one entry per line)") |
|
file_input = gr.File(file_types=[".txt"], label="Upload .txt") |
|
table = gr.Dataframe( |
|
headers=["#", "Text (truncated)", "Hate Label", "Label Conf.", "Dialect", |
|
"Dialect Conf.", "Threat Score", "Recommended Action"], |
|
row_count=1, col_count=8, wrap=True, interactive=False |
|
) |
|
download_csv = gr.File(label="Download Results (CSV)") |
|
status = gr.Markdown() |
|
|
|
run_file = gr.Button("Analyze File", variant="primary") |
|
run_file.click( |
|
fn=analyze_file, |
|
inputs=file_input, |
|
outputs=[table, download_csv, status] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|
|
|