File size: 6,762 Bytes
c1997df
 
ba294cc
c1997df
bd35ac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef4e241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba294cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef4e241
cd5d98f
ef4e241
 
 
 
 
9b60cd8
19be099
 
cd5d98f
ef4e241
19be099
ba294cc
e649907
ef4e241
d4cee6d
28faf01
2add840
d4cee6d
 
dc07e95
d4cee6d
ba294cc
358fb0c
ba294cc
 
dc07e95
bd35ac7
a0c2f3c
ba294cc
358fb0c
 
 
 
 
 
 
 
 
09373c9
dc07e95
ba294cc
dc07e95
ba294cc
a0c2f3c
ba294cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0c2f3c
ba294cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358fb0c
19be099
d4cee6d
358fb0c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import gradio as gr
from transformers import pipeline
import csv, tempfile

# ---------- Pipelines ----------
hate = pipeline(
    "text-classification",
    model="hossam87/bert-base-arabic-hate-speech",
    tokenizer="hossam87/bert-base-arabic-hate-speech",
    return_all_scores=False
)

dialect = pipeline(
    "text-classification",
    model="IbrahimAmin/marbertv2-arabic-written-dialect-classifier",
    tokenizer="IbrahimAmin/marbertv2-arabic-written-dialect-classifier",
    return_all_scores=False
)

# ---------- Inference ----------
def analyze(text: str):
    try:
        if not text or not text.strip():
            return ("", "", "", "", "", "Please enter some Arabic text.")

        h = hate(text)[0]
        d = dialect(text)[0]

        hate_label = h.get("label", "")
        hate_conf = float(h.get("score", 0.0))
        dial_label = d.get("label", "")
        dial_conf = float(d.get("score", 0.0))

        weights = {
            "Neutral": 0.0,
            "Offensive": 0.5,
            "Sexism": 1.0,
            "Racism": 1.0,
            "Religious Discrimination": 1.0,
        }
        score = hate_conf * weights.get(hate_label, 0.0)

        if hate_label != "Neutral" and weights.get(hate_label, 0.0) >= 1.0:
            action = "๐Ÿšจ Immediate Review โ€” Severe content detected. Escalate to moderators."
        elif hate_label != "Neutral":
            action = "โš ๏ธ Potentially Harmful โ€” Contains offensive content. Please review."
        elif score >= 0.49:
            action = "โš ๏ธ Borderline โ€” Review recommended."
        else:
            action = "โœ… Safe โ€” No action needed."

        return (hate_label, f"{hate_conf:.2f}", dial_label, f"{dial_conf:.2f}", f"{score:.2f}", action)

    except Exception as e:
        # keep the UI alive even if a model throws
        return ("", "", "", "", "", f"Runtime error: {e}")

def analyze_file(file):
    if file is None:
        return [], None, "Please upload a .txt file."

    try:
        # Read uploaded file (utf-8 with fallback)
        text = open(file.name, "r", encoding="utf-8", errors="ignore").read()
        lines = [ln.strip() for ln in text.splitlines() if ln.strip()]

        rows = []
        headers = ["#", "Text (truncated)", "Hate Label", "Label Conf.",
                   "Dialect", "Dialect Conf.", "Threat Score", "Recommended Action"]

        # Process up to 200 lines to keep UI responsive
        for i, ln in enumerate(lines[:200], start=1):
            hate_label, hate_conf, dial_label, dial_conf, score, action = analyze(ln)
            rows.append([
                i,
                (ln[:80] + "โ€ฆ") if len(ln) > 80 else ln,
                hate_label, hate_conf, dial_label, dial_conf, score, action
            ])

        # Write CSV to temp file for download
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
        with open(tmp.name, "w", encoding="utf-8", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(headers)
            writer.writerows(rows)

        status = f"Processed {len(rows)} lines."
        return rows, tmp.name, status

    except Exception as e:
        return [], None, f"Error reading file: {e}"
        
# ---------- CSS (sidebar only) ----------
CSS = """
/* Only style the sidebar box; leave main area/theme untouched */
#sidebar-box {
  border: 1px solid rgba(255,255,255,0.08);
  border-radius: 10px;
  padding: 20px;
}
"""

# ---------- UI ----------
with gr.Blocks(css=CSS) as demo:
    with gr.Row(equal_height=True):
        # Sidebar
        with gr.Column(scale=1):
            with gr.Group(elem_id="sidebar-box"):
                gr.Markdown("""
                ## ๐Ÿ›ก๏ธ AI Watchdog: Arabic Content Safety Analyzer

                **Purpose**  
                Analyze Arabic text for harmful or threatening language.

                **Features**
                - **Hate Speech Classification**
                - **Dialect Detection**
                - **Threat Severity Score**
                - **Recommended Action** (rule-based)

                **How to Use**
                - Type text or
                - Upload a .txt file (one entry per line)

                ### Dialect Abbreviation Guide
                | Abbreviation | Meaning |
                |--------------|---------|
                | **MAGHREB**  | North African dialects |
                | **LEV**      | Levantine dialects |
                | **MSA**      | Modern Standard Arabic |
                | **GLF**      | Gulf dialects |
                | **EGY**      | Egyptian Arabic |
                """)

        # Main area
        with gr.Column(scale=3):
            with gr.Tabs():
                with gr.Tab("Text"):
                    gr.Markdown("### Enter Arabic Text for Analysis")
                    input_text = gr.Textbox(lines=4, placeholder="ุงูƒุชุจ ู‡ู†ุง...", label="Arabic Text")

                    out_hate = gr.Textbox(label="Hate Speech Label", interactive=False)
                    out_hate_conf = gr.Textbox(label="Label Confidence", interactive=False)
                    out_dialect = gr.Textbox(label="Dialect", interactive=False)
                    out_dialect_conf = gr.Textbox(label="Dialect Confidence", interactive=False)
                    out_score = gr.Textbox(label="Threat Score", interactive=False)
                    out_action = gr.Textbox(label="Recommended Action", interactive=False)

                    analyze_btn = gr.Button("Analyze", variant="primary")
                    analyze_btn.click(
                        fn=analyze,
                        inputs=input_text,
                        outputs=[out_hate, out_hate_conf, out_dialect, out_dialect_conf, out_score, out_action],
                    )

                with gr.Tab("Upload"):
                    gr.Markdown("### Upload a .txt file (one entry per line)")
                    file_input = gr.File(file_types=[".txt"], label="Upload .txt")
                    table = gr.Dataframe(
                        headers=["#", "Text (truncated)", "Hate Label", "Label Conf.", "Dialect",
                                 "Dialect Conf.", "Threat Score", "Recommended Action"],
                        row_count=1, col_count=8, wrap=True, interactive=False
                    )
                    download_csv = gr.File(label="Download Results (CSV)")
                    status = gr.Markdown()

                    run_file = gr.Button("Analyze File", variant="primary")
                    run_file.click(
                        fn=analyze_file,
                        inputs=file_input,
                        outputs=[table, download_csv, status]
                    )

if __name__ == "__main__":
    demo.launch()