Spaces:

Mubarak2507
/

Arabic_Content_Safety_Analyser

Running

App Files Files Community

Arabic_Content_Safety_Analyser / app.py

Mubarak2507

Update app.py

358fb0c verified 3 days ago

raw

history blame contribute delete

6.76 kB

	import gradio as gr
	from transformers import pipeline
	import csv, tempfile

	# ---------- Pipelines ----------
	hate = pipeline(
	"text-classification",
	model="hossam87/bert-base-arabic-hate-speech",
	tokenizer="hossam87/bert-base-arabic-hate-speech",
	return_all_scores=False
	)

	dialect = pipeline(
	"text-classification",
	model="IbrahimAmin/marbertv2-arabic-written-dialect-classifier",
	tokenizer="IbrahimAmin/marbertv2-arabic-written-dialect-classifier",
	return_all_scores=False
	)

	# ---------- Inference ----------
	def analyze(text: str):
	try:
	if not text or not text.strip():
	return ("", "", "", "", "", "Please enter some Arabic text.")

	h = hate(text)[0]
	d = dialect(text)[0]

	hate_label = h.get("label", "")
	hate_conf = float(h.get("score", 0.0))
	dial_label = d.get("label", "")
	dial_conf = float(d.get("score", 0.0))

	weights = {
	"Neutral": 0.0,
	"Offensive": 0.5,
	"Sexism": 1.0,
	"Racism": 1.0,
	"Religious Discrimination": 1.0,
	}
	score = hate_conf * weights.get(hate_label, 0.0)

	if hate_label != "Neutral" and weights.get(hate_label, 0.0) >= 1.0:
	action = "🚨 Immediate Review — Severe content detected. Escalate to moderators."
	elif hate_label != "Neutral":
	action = "⚠️ Potentially Harmful — Contains offensive content. Please review."
	elif score >= 0.49:
	action = "⚠️ Borderline — Review recommended."
	else:
	action = "✅ Safe — No action needed."

	return (hate_label, f"{hate_conf:.2f}", dial_label, f"{dial_conf:.2f}", f"{score:.2f}", action)

	except Exception as e:
	# keep the UI alive even if a model throws
	return ("", "", "", "", "", f"Runtime error: {e}")

	def analyze_file(file):
	if file is None:
	return [], None, "Please upload a .txt file."

	try:
	# Read uploaded file (utf-8 with fallback)
	text = open(file.name, "r", encoding="utf-8", errors="ignore").read()
	lines = [ln.strip() for ln in text.splitlines() if ln.strip()]

	rows = []
	headers = ["#", "Text (truncated)", "Hate Label", "Label Conf.",
	"Dialect", "Dialect Conf.", "Threat Score", "Recommended Action"]

	# Process up to 200 lines to keep UI responsive
	for i, ln in enumerate(lines[:200], start=1):
	hate_label, hate_conf, dial_label, dial_conf, score, action = analyze(ln)
	rows.append([
	i,
	(ln[:80] + "…") if len(ln) > 80 else ln,
	hate_label, hate_conf, dial_label, dial_conf, score, action
	])

	# Write CSV to temp file for download
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
	with open(tmp.name, "w", encoding="utf-8", newline="") as f:
	writer = csv.writer(f)
	writer.writerow(headers)
	writer.writerows(rows)

	status = f"Processed {len(rows)} lines."
	return rows, tmp.name, status

	except Exception as e:
	return [], None, f"Error reading file: {e}"

	# ---------- CSS (sidebar only) ----------
	CSS = """
	/* Only style the sidebar box; leave main area/theme untouched */
	#sidebar-box {
	border: 1px solid rgba(255,255,255,0.08);
	border-radius: 10px;
	padding: 20px;
	}
	"""

	# ---------- UI ----------
	with gr.Blocks(css=CSS) as demo:
	with gr.Row(equal_height=True):
	# Sidebar
	with gr.Column(scale=1):
	with gr.Group(elem_id="sidebar-box"):
	gr.Markdown("""
	## 🛡️ AI Watchdog: Arabic Content Safety Analyzer

	Purpose
	Analyze Arabic text for harmful or threatening language.

	Features
	- Hate Speech Classification
	- Dialect Detection
	- Threat Severity Score
	- Recommended Action (rule-based)

	How to Use
	- Type text or
	- Upload a .txt file (one entry per line)

	### Dialect Abbreviation Guide
	\| Abbreviation \| Meaning \|
	\|--------------\|---------\|
	\| MAGHREB \| North African dialects \|
	\| LEV \| Levantine dialects \|
	\| MSA \| Modern Standard Arabic \|
	\| GLF \| Gulf dialects \|
	\| EGY \| Egyptian Arabic \|
	""")

	# Main area
	with gr.Column(scale=3):
	with gr.Tabs():
	with gr.Tab("Text"):
	gr.Markdown("### Enter Arabic Text for Analysis")
	input_text = gr.Textbox(lines=4, placeholder="اكتب هنا...", label="Arabic Text")

	out_hate = gr.Textbox(label="Hate Speech Label", interactive=False)
	out_hate_conf = gr.Textbox(label="Label Confidence", interactive=False)
	out_dialect = gr.Textbox(label="Dialect", interactive=False)
	out_dialect_conf = gr.Textbox(label="Dialect Confidence", interactive=False)
	out_score = gr.Textbox(label="Threat Score", interactive=False)
	out_action = gr.Textbox(label="Recommended Action", interactive=False)

	analyze_btn = gr.Button("Analyze", variant="primary")
	analyze_btn.click(
	fn=analyze,
	inputs=input_text,
	outputs=[out_hate, out_hate_conf, out_dialect, out_dialect_conf, out_score, out_action],
	)

	with gr.Tab("Upload"):
	gr.Markdown("### Upload a .txt file (one entry per line)")
	file_input = gr.File(file_types=[".txt"], label="Upload .txt")
	table = gr.Dataframe(
	headers=["#", "Text (truncated)", "Hate Label", "Label Conf.", "Dialect",
	"Dialect Conf.", "Threat Score", "Recommended Action"],
	row_count=1, col_count=8, wrap=True, interactive=False
	)
	download_csv = gr.File(label="Download Results (CSV)")
	status = gr.Markdown()

	run_file = gr.Button("Analyze File", variant="primary")
	run_file.click(
	fn=analyze_file,
	inputs=file_input,
	outputs=[table, download_csv, status]
	)

	if __name__ == "__main__":
	demo.launch()