Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,104 +5,183 @@ import numpy as np
|
|
5 |
import json
|
6 |
from datetime import datetime
|
7 |
import logging
|
|
|
8 |
|
9 |
# Set up logging
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
-
class
|
14 |
def __init__(self):
|
15 |
-
self.
|
16 |
-
self.
|
17 |
-
self.
|
18 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
def
|
21 |
-
"""Load
|
22 |
try:
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
model_path = "./model"
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
logger.info("✅ Tokenizer loaded successfully")
|
31 |
-
|
32 |
-
# Load model
|
33 |
-
self.model = AutoModelForSequenceClassification.from_pretrained(
|
34 |
model_path,
|
35 |
-
torch_dtype=torch.float32
|
36 |
-
device_map="auto"
|
37 |
)
|
38 |
-
logger.info("✅ DistilBERT model loaded successfully")
|
39 |
|
40 |
-
|
41 |
-
self.classifier = pipeline(
|
42 |
"text-classification",
|
43 |
-
model=
|
44 |
-
tokenizer=
|
45 |
return_all_scores=True,
|
46 |
device=0 if torch.cuda.is_available() else -1
|
47 |
)
|
|
|
48 |
|
49 |
-
|
50 |
-
logger.
|
51 |
-
logger.info(
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
except Exception as e:
|
55 |
-
logger.error(f"❌
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
#
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
)
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
69 |
|
70 |
-
def
|
71 |
-
"""
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
def detect_hate_speech(self, text):
|
83 |
-
"""
|
84 |
if not text or not text.strip():
|
85 |
return {
|
86 |
"status": "❌ Please enter some text to analyze.",
|
87 |
"prediction": "No input",
|
88 |
"confidence": 0.0,
|
89 |
"all_scores": {},
|
90 |
-
"risk_level": "Unknown"
|
|
|
91 |
}
|
92 |
|
93 |
try:
|
94 |
-
|
95 |
-
processed_text = self.preprocess_text(text)
|
96 |
|
97 |
-
# Get predictions
|
98 |
-
results = self.classifier(processed_text)
|
99 |
-
|
100 |
-
# Handle different output formats
|
101 |
if isinstance(results, list) and len(results) > 0:
|
102 |
if isinstance(results[0], list):
|
103 |
results = results[0]
|
104 |
|
105 |
-
# Parse results
|
106 |
all_scores = {}
|
107 |
max_score = 0
|
108 |
predicted_label = "UNKNOWN"
|
@@ -122,7 +201,6 @@ class DistilBERTHateSpeechDetector:
|
|
122 |
|
123 |
# Determine hate speech status
|
124 |
hate_keywords = ["HATE", "TOXIC", "NEGATIVE", "HARMFUL", "1", "LABEL_1"]
|
125 |
-
clean_keywords = ["CLEAN", "NORMAL", "POSITIVE", "SAFE", "0", "LABEL_0"]
|
126 |
|
127 |
is_hate_speech = False
|
128 |
risk_level = "Low"
|
@@ -153,274 +231,553 @@ class DistilBERTHateSpeechDetector:
|
|
153 |
}
|
154 |
|
155 |
except Exception as e:
|
156 |
-
logger.error(f"
|
157 |
return {
|
158 |
-
"status": f"❌
|
159 |
"prediction": "Error",
|
160 |
"confidence": 0.0,
|
161 |
"all_scores": {},
|
162 |
-
"risk_level": "Unknown"
|
|
|
163 |
}
|
164 |
|
165 |
-
def
|
166 |
-
"""
|
167 |
-
if not
|
168 |
-
return "
|
169 |
|
170 |
-
|
171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
"🎯 **Focus Shift**: Instead of focusing on differences that divide, consider highlighting shared values or common ground.",
|
184 |
-
"🔄 **Reframe Opportunity**: How might you express this same sentiment in a way that brings people together rather than apart?"
|
185 |
-
]
|
186 |
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
|
|
|
|
|
|
|
|
191 |
else:
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
import random
|
199 |
-
return random.choice(
|
200 |
|
201 |
-
def
|
202 |
-
"""
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
214 |
-
# Initialize the
|
215 |
-
logger.info("
|
216 |
-
|
217 |
|
218 |
-
def
|
219 |
-
"""Main analysis function
|
220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
-
#
|
223 |
-
|
224 |
|
225 |
-
#
|
226 |
-
|
|
|
|
|
227 |
|
228 |
-
#
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
|
|
|
|
|
|
|
|
234 |
|
235 |
-
#
|
236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
|
238 |
-
return
|
239 |
|
240 |
-
def
|
241 |
-
"""
|
242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
244 |
# Create the Gradio interface
|
245 |
with gr.Blocks(
|
246 |
-
title="
|
247 |
theme=gr.themes.Soft(),
|
248 |
css="""
|
249 |
.gradio-container {
|
250 |
max-width: 1400px !important;
|
251 |
}
|
252 |
-
.
|
|
|
253 |
padding: 1rem;
|
254 |
border-radius: 8px;
|
255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
}
|
257 |
"""
|
258 |
) as demo:
|
259 |
|
260 |
gr.Markdown("""
|
261 |
-
#
|
|
|
|
|
262 |
|
263 |
-
**
|
|
|
|
|
|
|
264 |
|
265 |
-
|
266 |
-
📚 **Educational Counter-Narratives** - AI-generated constructive responses
|
267 |
-
⚡ **Real-time Processing** - Fast analysis with detailed confidence scores
|
268 |
-
🎯 **Multi-level Risk Assessment** - Nuanced understanding of content severity
|
269 |
""")
|
270 |
|
271 |
-
with gr.Tab("
|
272 |
with gr.Row():
|
273 |
with gr.Column(scale=2):
|
274 |
text_input = gr.Textbox(
|
275 |
-
label="Enter text
|
276 |
-
placeholder="
|
277 |
lines=5,
|
278 |
max_lines=15
|
279 |
)
|
280 |
|
281 |
with gr.Row():
|
282 |
-
analyze_btn = gr.Button("
|
283 |
clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
|
|
|
284 |
|
285 |
gr.Examples(
|
286 |
examples=[
|
287 |
-
["This is a wonderful day to learn
|
288 |
-
["I
|
289 |
-
["
|
290 |
-
["Thank you for sharing your
|
291 |
-
["
|
292 |
-
["I
|
293 |
],
|
294 |
inputs=text_input,
|
295 |
-
label="📝 Try these
|
296 |
)
|
297 |
|
298 |
with gr.Row():
|
299 |
-
with gr.Column(
|
300 |
-
|
301 |
-
label="🎯 Detection Result",
|
302 |
interactive=False,
|
303 |
lines=3
|
304 |
)
|
305 |
|
306 |
-
|
307 |
-
label="
|
308 |
interactive=False,
|
309 |
-
lines=
|
|
|
310 |
)
|
311 |
|
312 |
-
with gr.Column(
|
313 |
-
|
314 |
-
label="
|
315 |
interactive=False,
|
316 |
-
lines=
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
)
|
318 |
|
319 |
with gr.Row():
|
320 |
-
|
321 |
-
label="📊
|
322 |
visible=True
|
323 |
)
|
324 |
|
325 |
-
with gr.Tab("
|
326 |
with gr.Row():
|
327 |
with gr.Column():
|
328 |
-
gr.Markdown("
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
|
334 |
with gr.Column():
|
335 |
gr.Markdown("""
|
336 |
-
##
|
|
|
|
|
337 |
|
338 |
-
|
339 |
-
-
|
340 |
-
-
|
341 |
-
-
|
342 |
-
-
|
|
|
343 |
|
344 |
-
|
345 |
-
-
|
346 |
-
-
|
347 |
-
-
|
|
|
348 |
""")
|
349 |
-
|
350 |
-
|
|
|
|
|
|
|
|
|
|
|
351 |
gr.Markdown("""
|
352 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
|
354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
|
362 |
-
###
|
363 |
```
|
364 |
-
Input Text
|
|
|
|
|
|
|
|
|
|
|
|
|
365 |
```
|
366 |
|
367 |
-
###
|
368 |
-
|
369 |
-
|
370 |
-
-
|
371 |
-
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
**
|
376 |
-
-
|
377 |
-
-
|
378 |
-
-
|
379 |
-
-
|
380 |
-
|
381 |
-
**
|
382 |
-
-
|
383 |
-
-
|
384 |
-
-
|
385 |
-
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
-
|
390 |
-
-
|
391 |
-
-
|
392 |
-
-
|
393 |
-
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
- **
|
399 |
-
- **
|
400 |
-
- **
|
401 |
-
- **
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
-
|
408 |
-
|
409 |
-
|
410 |
-
-
|
411 |
-
-
|
|
|
|
|
412 |
""")
|
413 |
|
414 |
# Event handlers
|
415 |
analyze_btn.click(
|
416 |
-
fn=
|
417 |
inputs=text_input,
|
418 |
-
outputs=[
|
419 |
)
|
420 |
|
421 |
clear_btn.click(
|
422 |
fn=lambda: ("", "", "", "", {}),
|
423 |
-
outputs=[text_input,
|
|
|
|
|
|
|
|
|
|
|
424 |
)
|
425 |
|
426 |
# Launch configuration
|
|
|
5 |
import json
|
6 |
from datetime import datetime
|
7 |
import logging
|
8 |
+
import os
|
9 |
|
10 |
# Set up logging
|
11 |
logging.basicConfig(level=logging.INFO)
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
14 |
+
class PromptBasedMultiAgentSystem:
|
15 |
def __init__(self):
|
16 |
+
self.detection_agent = None
|
17 |
+
self.counter_speech_agent = None
|
18 |
+
self.moderation_agent = None
|
19 |
+
self.sentiment_agent = None
|
20 |
+
|
21 |
+
# Load prompt configurations
|
22 |
+
self.counter_speech_prompts = self.load_prompts("counter_speech_prompts.json")
|
23 |
+
self.moderation_prompts = self.load_prompts("moderation_prompts.json")
|
24 |
+
|
25 |
+
self.initialize_agents()
|
26 |
|
27 |
+
def load_prompts(self, filename):
|
28 |
+
"""Load prompts from JSON file with fallback"""
|
29 |
try:
|
30 |
+
if os.path.exists(filename):
|
31 |
+
with open(filename, 'r', encoding='utf-8') as f:
|
32 |
+
return json.load(f)
|
33 |
+
else:
|
34 |
+
logger.warning(f"Prompt file {filename} not found, using built-in prompts")
|
35 |
+
return self.get_default_prompts(filename)
|
36 |
+
except Exception as e:
|
37 |
+
logger.error(f"Error loading prompts from {filename}: {e}")
|
38 |
+
return self.get_default_prompts(filename)
|
39 |
+
|
40 |
+
def get_default_prompts(self, filename):
|
41 |
+
"""Default prompts as fallback"""
|
42 |
+
if "counter_speech" in filename:
|
43 |
+
return {
|
44 |
+
"counter_speech_prompts": {
|
45 |
+
"high_risk": {
|
46 |
+
"system_prompt": "You are an expert educator specializing in counter-speech and conflict de-escalation.",
|
47 |
+
"user_prompt_template": "Generate a respectful, educational counter-speech response to address harmful content while promoting understanding. Original text (Risk: {risk_level}, Confidence: {confidence}%, Sentiment: {sentiment}): \"{original_text}\"\n\nCounter-speech response:",
|
48 |
+
},
|
49 |
+
"general_template": {
|
50 |
+
"fallback_responses": [
|
51 |
+
"Thank you for sharing your thoughts. Building strong communities works best when we focus on shared values and constructive dialogue. How might we work together on the concerns you've raised?",
|
52 |
+
"I appreciate your perspective. Sometimes our strongest feelings can be expressed in ways that bring people together. What specific positive changes would you like to see?",
|
53 |
+
"Your engagement with this topic is clear. When we channel that energy into inclusive dialogue, we often find solutions that work for everyone."
|
54 |
+
]
|
55 |
+
}
|
56 |
+
}
|
57 |
+
}
|
58 |
+
else:
|
59 |
+
return {
|
60 |
+
"moderation_prompts": {
|
61 |
+
"comprehensive_analysis": {
|
62 |
+
"system_prompt": "You are an expert content moderation specialist analyzing text for safety and compliance.",
|
63 |
+
"user_prompt_template": "Analyze this text for potential violations: \"{text}\"\n\nProvide: 1) Safety assessment 2) Violation categories 3) Severity level 4) Confidence score 5) Recommended action\n\nAnalysis:",
|
64 |
+
}
|
65 |
+
}
|
66 |
+
}
|
67 |
+
|
68 |
+
def initialize_agents(self):
|
69 |
+
"""Initialize all AI agents"""
|
70 |
+
logger.info("🤖 Initializing Prompt-Based Multi-Agent System...")
|
71 |
+
|
72 |
+
self.setup_detection_agent()
|
73 |
+
self.setup_counter_speech_agent()
|
74 |
+
self.setup_moderation_agent()
|
75 |
+
self.setup_sentiment_agent()
|
76 |
+
|
77 |
+
logger.info("✅ All agents initialized successfully!")
|
78 |
+
|
79 |
+
def setup_detection_agent(self):
|
80 |
+
"""Initialize the hate speech detection agent"""
|
81 |
+
try:
|
82 |
+
logger.info("🔍 Loading Detection Agent (Fine-tuned DistilBERT)...")
|
83 |
model_path = "./model"
|
84 |
|
85 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
86 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
|
|
|
|
|
|
|
|
87 |
model_path,
|
88 |
+
torch_dtype=torch.float32
|
|
|
89 |
)
|
|
|
90 |
|
91 |
+
self.detection_agent = pipeline(
|
|
|
92 |
"text-classification",
|
93 |
+
model=model,
|
94 |
+
tokenizer=tokenizer,
|
95 |
return_all_scores=True,
|
96 |
device=0 if torch.cuda.is_available() else -1
|
97 |
)
|
98 |
+
logger.info("✅ Detection Agent loaded successfully")
|
99 |
|
100 |
+
except Exception as e:
|
101 |
+
logger.error(f"❌ Detection Agent failed: {e}")
|
102 |
+
logger.info("🔄 Using fallback detection model...")
|
103 |
+
self.detection_agent = pipeline(
|
104 |
+
"text-classification",
|
105 |
+
model="unitary/toxic-bert",
|
106 |
+
return_all_scores=True
|
107 |
+
)
|
108 |
+
|
109 |
+
def setup_counter_speech_agent(self):
|
110 |
+
"""Initialize counter-speech generation agent with prompts"""
|
111 |
+
try:
|
112 |
+
logger.info("💬 Loading Counter-Speech Agent with Custom Prompts...")
|
113 |
+
|
114 |
+
# Using FLAN-T5 which is excellent at following instructions
|
115 |
+
self.counter_speech_agent = pipeline(
|
116 |
+
"text2text-generation",
|
117 |
+
model="google/flan-t5-base",
|
118 |
+
max_length=200,
|
119 |
+
do_sample=True,
|
120 |
+
temperature=0.7,
|
121 |
+
top_p=0.9,
|
122 |
+
device=0 if torch.cuda.is_available() else -1
|
123 |
+
)
|
124 |
+
logger.info("✅ Counter-Speech Agent loaded (FLAN-T5 with custom prompts)")
|
125 |
|
126 |
except Exception as e:
|
127 |
+
logger.error(f"❌ Counter-Speech Agent failed: {e}")
|
128 |
+
self.counter_speech_agent = None
|
129 |
+
|
130 |
+
def setup_moderation_agent(self):
|
131 |
+
"""Initialize content moderation agent with prompts"""
|
132 |
+
try:
|
133 |
+
logger.info("🛡️ Loading Moderation Agent with Custom Prompts...")
|
134 |
|
135 |
+
# Using FLAN-T5 for structured moderation analysis
|
136 |
+
self.moderation_agent = pipeline(
|
137 |
+
"text2text-generation",
|
138 |
+
model="google/flan-t5-base",
|
139 |
+
max_length=300,
|
140 |
+
do_sample=False,
|
141 |
+
device=0 if torch.cuda.is_available() else -1
|
142 |
+
)
|
143 |
+
logger.info("✅ Moderation Agent loaded (FLAN-T5 with analysis prompts)")
|
144 |
+
|
145 |
+
except Exception as e:
|
146 |
+
logger.error(f"❌ Moderation Agent failed: {e}")
|
147 |
+
self.moderation_agent = None
|
148 |
|
149 |
+
def setup_sentiment_agent(self):
|
150 |
+
"""Initialize sentiment analysis agent"""
|
151 |
+
try:
|
152 |
+
logger.info("📊 Loading Sentiment Agent...")
|
153 |
+
|
154 |
+
self.sentiment_agent = pipeline(
|
155 |
+
"sentiment-analysis",
|
156 |
+
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
|
157 |
+
return_all_scores=True,
|
158 |
+
device=0 if torch.cuda.is_available() else -1
|
159 |
+
)
|
160 |
+
logger.info("✅ Sentiment Agent loaded (Twitter-RoBERTa)")
|
161 |
+
|
162 |
+
except Exception as e:
|
163 |
+
logger.error(f"❌ Sentiment Agent failed: {e}")
|
164 |
+
self.sentiment_agent = None
|
165 |
|
166 |
def detect_hate_speech(self, text):
|
167 |
+
"""Detection Agent: Analyze text for hate speech"""
|
168 |
if not text or not text.strip():
|
169 |
return {
|
170 |
"status": "❌ Please enter some text to analyze.",
|
171 |
"prediction": "No input",
|
172 |
"confidence": 0.0,
|
173 |
"all_scores": {},
|
174 |
+
"risk_level": "Unknown",
|
175 |
+
"is_hate_speech": False
|
176 |
}
|
177 |
|
178 |
try:
|
179 |
+
results = self.detection_agent(text.strip())
|
|
|
180 |
|
|
|
|
|
|
|
|
|
181 |
if isinstance(results, list) and len(results) > 0:
|
182 |
if isinstance(results[0], list):
|
183 |
results = results[0]
|
184 |
|
|
|
185 |
all_scores = {}
|
186 |
max_score = 0
|
187 |
predicted_label = "UNKNOWN"
|
|
|
201 |
|
202 |
# Determine hate speech status
|
203 |
hate_keywords = ["HATE", "TOXIC", "NEGATIVE", "HARMFUL", "1", "LABEL_1"]
|
|
|
204 |
|
205 |
is_hate_speech = False
|
206 |
risk_level = "Low"
|
|
|
231 |
}
|
232 |
|
233 |
except Exception as e:
|
234 |
+
logger.error(f"Detection error: {e}")
|
235 |
return {
|
236 |
+
"status": f"❌ Detection error: {str(e)}",
|
237 |
"prediction": "Error",
|
238 |
"confidence": 0.0,
|
239 |
"all_scores": {},
|
240 |
+
"risk_level": "Unknown",
|
241 |
+
"is_hate_speech": False
|
242 |
}
|
243 |
|
244 |
+
def analyze_sentiment(self, text):
|
245 |
+
"""Sentiment Agent: Analyze emotional tone"""
|
246 |
+
if not self.sentiment_agent or not text.strip():
|
247 |
+
return {"sentiment": "neutral", "confidence": 0.0}
|
248 |
|
249 |
+
try:
|
250 |
+
results = self.sentiment_agent(text.strip())
|
251 |
+
if isinstance(results, list) and len(results) > 0:
|
252 |
+
if isinstance(results[0], list):
|
253 |
+
results = results[0]
|
254 |
+
|
255 |
+
best_sentiment = max(results, key=lambda x: x['score'])
|
256 |
+
return {
|
257 |
+
"sentiment": best_sentiment['label'].lower(),
|
258 |
+
"confidence": best_sentiment['score'],
|
259 |
+
"all_sentiments": {r['label']: r['score'] for r in results}
|
260 |
+
}
|
261 |
+
except Exception as e:
|
262 |
+
logger.error(f"Sentiment analysis error: {e}")
|
263 |
+
return {"sentiment": "neutral", "confidence": 0.0}
|
264 |
+
|
265 |
+
def moderate_content_with_prompts(self, text, detection_result, sentiment_result):
|
266 |
+
"""Moderation Agent: Structured analysis using prompts"""
|
267 |
+
if not self.moderation_agent or not text.strip():
|
268 |
+
return {"analysis": "Unable to perform moderation analysis", "confidence": 0.0}
|
269 |
|
270 |
+
try:
|
271 |
+
# Get the appropriate moderation prompt
|
272 |
+
moderation_config = self.moderation_prompts.get("moderation_prompts", {})
|
273 |
+
analysis_config = moderation_config.get("comprehensive_analysis", {})
|
274 |
+
|
275 |
+
# Construct the analysis prompt
|
276 |
+
system_prompt = analysis_config.get("system_prompt", "Analyze this text for safety concerns.")
|
277 |
+
user_prompt_template = analysis_config.get("user_prompt_template", "Analyze: {text}")
|
278 |
+
|
279 |
+
# Fill in the template
|
280 |
+
full_prompt = f"{system_prompt}\n\n{user_prompt_template.format(text=text)}"
|
281 |
+
|
282 |
+
# Generate analysis
|
283 |
+
result = self.moderation_agent(full_prompt, max_length=250, do_sample=False)
|
284 |
+
|
285 |
+
if result and len(result) > 0:
|
286 |
+
analysis_text = result[0]['generated_text']
|
287 |
+
|
288 |
+
# Parse the analysis for key information
|
289 |
+
confidence = self.extract_confidence_from_analysis(analysis_text)
|
290 |
+
safety_level = self.extract_safety_level_from_analysis(analysis_text)
|
291 |
+
|
292 |
+
return {
|
293 |
+
"analysis": analysis_text,
|
294 |
+
"confidence": confidence,
|
295 |
+
"safety_level": safety_level,
|
296 |
+
"prompt_used": "comprehensive_analysis"
|
297 |
+
}
|
298 |
+
|
299 |
+
except Exception as e:
|
300 |
+
logger.error(f"Moderation analysis error: {e}")
|
301 |
+
|
302 |
+
# Fallback analysis
|
303 |
+
return {
|
304 |
+
"analysis": f"Basic assessment: Risk level {detection_result.get('risk_level', 'unknown')}, requires review if confidence > 70%",
|
305 |
+
"confidence": detection_result.get('confidence', 0.0),
|
306 |
+
"safety_level": "review_needed" if detection_result.get('confidence', 0) > 0.7 else "acceptable"
|
307 |
+
}
|
308 |
+
|
309 |
+
def generate_counter_speech_with_prompts(self, text, detection_result, sentiment_result):
|
310 |
+
"""Counter-Speech Agent: Generate response using custom prompts"""
|
311 |
+
if not detection_result.get("is_hate_speech", False):
|
312 |
+
return "✨ This text promotes positive communication. Great job maintaining respectful dialogue!"
|
313 |
|
314 |
+
risk_level = detection_result.get("risk_level", "Low").lower()
|
315 |
+
confidence = detection_result.get("confidence", 0.0) * 100
|
316 |
+
sentiment = sentiment_result.get("sentiment", "neutral")
|
|
|
|
|
|
|
317 |
|
318 |
+
# Get appropriate prompts based on risk level
|
319 |
+
counter_speech_config = self.counter_speech_prompts.get("counter_speech_prompts", {})
|
320 |
+
|
321 |
+
# Select prompt based on risk level
|
322 |
+
if risk_level == "high":
|
323 |
+
prompt_config = counter_speech_config.get("high_risk", {})
|
324 |
+
elif risk_level == "medium":
|
325 |
+
prompt_config = counter_speech_config.get("medium_risk", {})
|
326 |
else:
|
327 |
+
prompt_config = counter_speech_config.get("low_risk", {})
|
328 |
+
|
329 |
+
# If no specific config, use general template
|
330 |
+
if not prompt_config:
|
331 |
+
prompt_config = counter_speech_config.get("general_template", {})
|
332 |
+
|
333 |
+
if self.counter_speech_agent and prompt_config:
|
334 |
+
try:
|
335 |
+
# Construct the prompt
|
336 |
+
system_prompt = prompt_config.get("system_prompt", "Generate a respectful counter-speech response.")
|
337 |
+
user_prompt_template = prompt_config.get("user_prompt_template",
|
338 |
+
"Generate a counter-speech response for: {original_text}")
|
339 |
+
|
340 |
+
# Fill in the template
|
341 |
+
full_prompt = f"{system_prompt}\n\n{user_prompt_template.format(original_text=text, risk_level=risk_level, confidence=confidence, sentiment=sentiment)}"
|
342 |
+
|
343 |
+
# Generate counter-speech
|
344 |
+
result = self.counter_speech_agent(full_prompt, max_length=150, do_sample=True, temperature=0.7)
|
345 |
+
|
346 |
+
if result and len(result) > 0:
|
347 |
+
generated_text = result[0]['generated_text']
|
348 |
+
|
349 |
+
# Clean up the response
|
350 |
+
if "Counter-speech response:" in generated_text:
|
351 |
+
generated_text = generated_text.split("Counter-speech response:")[-1].strip()
|
352 |
+
elif "response:" in generated_text.lower():
|
353 |
+
parts = generated_text.lower().split("response:")
|
354 |
+
if len(parts) > 1:
|
355 |
+
generated_text = parts[-1].strip()
|
356 |
+
|
357 |
+
return f"🤖 **AI-Generated Counter-Speech** ({risk_level.title()} Risk): {generated_text}"
|
358 |
+
|
359 |
+
except Exception as e:
|
360 |
+
logger.error(f"Counter-speech generation error: {e}")
|
361 |
+
|
362 |
+
# Fallback to template responses
|
363 |
+
fallback_responses = counter_speech_config.get("general_template", {}).get("fallback_responses", [
|
364 |
+
"Thank you for sharing your thoughts. Building strong communities works best when we focus on shared values and constructive dialogue."
|
365 |
+
])
|
366 |
|
367 |
import random
|
368 |
+
return f"📝 **Template Response** ({risk_level.title()} Risk): {random.choice(fallback_responses)}"
|
369 |
|
370 |
+
def extract_confidence_from_analysis(self, analysis_text):
|
371 |
+
"""Extract confidence score from moderation analysis"""
|
372 |
+
import re
|
373 |
+
# Look for confidence patterns like "85%" or "confidence: 0.85"
|
374 |
+
patterns = [
|
375 |
+
r'(\d+)%',
|
376 |
+
r'confidence[:\s]+(\d*\.?\d+)',
|
377 |
+
r'(\d*\.?\d+)\s*confidence'
|
378 |
+
]
|
379 |
+
|
380 |
+
for pattern in patterns:
|
381 |
+
match = re.search(pattern, analysis_text.lower())
|
382 |
+
if match:
|
383 |
+
value = float(match.group(1))
|
384 |
+
return value / 100 if value > 1 else value
|
385 |
+
|
386 |
+
return 0.5 # Default moderate confidence
|
387 |
+
|
388 |
+
def extract_safety_level_from_analysis(self, analysis_text):
|
389 |
+
"""Extract safety assessment from moderation analysis"""
|
390 |
+
analysis_lower = analysis_text.lower()
|
391 |
+
|
392 |
+
if any(word in analysis_lower for word in ['harmful', 'high risk', 'remove', 'violation']):
|
393 |
+
return "harmful"
|
394 |
+
elif any(word in analysis_lower for word in ['concerning', 'medium risk', 'review', 'warning']):
|
395 |
+
return "concerning"
|
396 |
+
elif any(word in analysis_lower for word in ['safe', 'low risk', 'acceptable', 'approve']):
|
397 |
+
return "safe"
|
398 |
+
else:
|
399 |
+
return "review_needed"
|
400 |
+
|
401 |
+
def comprehensive_analysis(self, text):
|
402 |
+
"""Run all agents with prompt-based analysis"""
|
403 |
+
start_time = datetime.now()
|
404 |
+
|
405 |
+
# Run core agents
|
406 |
+
detection_result = self.detect_hate_speech(text)
|
407 |
+
sentiment_result = self.analyze_sentiment(text)
|
408 |
+
|
409 |
+
# Run prompt-based agents
|
410 |
+
moderation_result = self.moderate_content_with_prompts(text, detection_result, sentiment_result)
|
411 |
+
counter_speech = self.generate_counter_speech_with_prompts(text, detection_result, sentiment_result)
|
412 |
+
|
413 |
+
processing_time = (datetime.now() - start_time).total_seconds()
|
414 |
+
|
415 |
+
return {
|
416 |
+
"detection": detection_result,
|
417 |
+
"sentiment": sentiment_result,
|
418 |
+
"moderation": moderation_result,
|
419 |
+
"counter_speech": counter_speech,
|
420 |
+
"processing_time": processing_time,
|
421 |
+
"timestamp": datetime.now().isoformat()
|
422 |
+
}
|
423 |
|
424 |
+
# Initialize the system
|
425 |
+
logger.info("🚀 Starting Prompt-Based Multi-Agent System...")
|
426 |
+
agent_system = PromptBasedMultiAgentSystem()
|
427 |
|
428 |
+
def analyze_text_with_prompts(text):
|
429 |
+
"""Main analysis function using prompt-based agents"""
|
430 |
+
if not text or not text.strip():
|
431 |
+
return (
|
432 |
+
"❌ Please enter some text to analyze.",
|
433 |
+
{},
|
434 |
+
"No analysis performed.",
|
435 |
+
"No input provided",
|
436 |
+
{}
|
437 |
+
)
|
438 |
|
439 |
+
# Run comprehensive analysis with prompts
|
440 |
+
results = agent_system.comprehensive_analysis(text)
|
441 |
|
442 |
+
# Extract results for display
|
443 |
+
detection_status = results["detection"]["status"]
|
444 |
+
detection_scores = results["detection"]["all_scores"]
|
445 |
+
counter_speech = results["counter_speech"]
|
446 |
|
447 |
+
# Create detailed agent summary
|
448 |
+
agent_summary = f"""
|
449 |
+
🔍 **Detection Agent**: {results['detection']['risk_level']} risk ({results['detection']['confidence']:.2%} confidence)
|
450 |
+
📊 **Sentiment Agent**: {results['sentiment']['sentiment'].title()} ({results['sentiment']['confidence']:.2%} confidence)
|
451 |
+
🛡️ **Moderation Agent**: {results['moderation'].get('safety_level', 'unknown').title()} safety level ({results['moderation'].get('confidence', 0):.2%} confidence)
|
452 |
+
💬 **Counter-Speech Agent**: {"Custom prompt-based" if "AI-Generated" in counter_speech else "Template-based"} response
|
453 |
+
⏱️ **Processing Time**: {results['processing_time']:.3f} seconds
|
454 |
+
|
455 |
+
📋 **Moderation Analysis**: {results['moderation'].get('analysis', 'No detailed analysis available')[:200]}...
|
456 |
+
"""
|
457 |
|
458 |
+
# Compile comprehensive agent data
|
459 |
+
all_agent_data = {
|
460 |
+
"Detection_Analysis": {
|
461 |
+
"scores": detection_scores,
|
462 |
+
"risk_level": results['detection']['risk_level'],
|
463 |
+
"is_hate_speech": results['detection']['is_hate_speech']
|
464 |
+
},
|
465 |
+
"Sentiment_Analysis": {
|
466 |
+
"primary_sentiment": results['sentiment']['sentiment'],
|
467 |
+
"all_sentiments": results["sentiment"].get("all_sentiments", {})
|
468 |
+
},
|
469 |
+
"Moderation_Analysis": {
|
470 |
+
"safety_assessment": results['moderation'].get('safety_level', 'unknown'),
|
471 |
+
"detailed_analysis": results['moderation'].get('analysis', ''),
|
472 |
+
"confidence": results['moderation'].get('confidence', 0),
|
473 |
+
"prompt_used": results['moderation'].get('prompt_used', 'fallback')
|
474 |
+
},
|
475 |
+
"Counter_Speech": {
|
476 |
+
"response": counter_speech,
|
477 |
+
"generation_method": "AI-Generated" if "AI-Generated" in counter_speech else "Template-based"
|
478 |
+
},
|
479 |
+
"System_Info": {
|
480 |
+
"timestamp": results["timestamp"],
|
481 |
+
"processing_time_seconds": results["processing_time"],
|
482 |
+
"prompt_files_loaded": {
|
483 |
+
"counter_speech": bool(agent_system.counter_speech_prompts),
|
484 |
+
"moderation": bool(agent_system.moderation_prompts)
|
485 |
+
}
|
486 |
+
}
|
487 |
+
}
|
488 |
|
489 |
+
return detection_status, detection_scores, counter_speech, agent_summary, all_agent_data
|
490 |
|
491 |
+
def reload_prompts():
|
492 |
+
"""Reload prompt files for testing"""
|
493 |
+
try:
|
494 |
+
agent_system.counter_speech_prompts = agent_system.load_prompts("counter_speech_prompts.json")
|
495 |
+
agent_system.moderation_prompts = agent_system.load_prompts("moderation_prompts.json")
|
496 |
+
return "✅ Prompts reloaded successfully!"
|
497 |
+
except Exception as e:
|
498 |
+
return f"❌ Error reloading prompts: {e}"
|
499 |
+
|
500 |
+
def get_prompt_info():
|
501 |
+
"""Get information about loaded prompts"""
|
502 |
+
counter_prompts = len(agent_system.counter_speech_prompts.get("counter_speech_prompts", {}))
|
503 |
+
moderation_prompts = len(agent_system.moderation_prompts.get("moderation_prompts", {}))
|
504 |
+
|
505 |
+
return {
|
506 |
+
"counter_speech_prompt_categories": counter_prompts,
|
507 |
+
"moderation_prompt_categories": moderation_prompts,
|
508 |
+
"prompt_files_status": {
|
509 |
+
"counter_speech_prompts.json": "✅ Loaded" if counter_prompts > 0 else "❌ Not found",
|
510 |
+
"moderation_prompts.json": "✅ Loaded" if moderation_prompts > 0 else "❌ Not found"
|
511 |
+
}
|
512 |
+
}
|
513 |
|
514 |
# Create the Gradio interface
|
515 |
with gr.Blocks(
|
516 |
+
title="Prompt-Based Multi-Agent Hate Speech Detection System",
|
517 |
theme=gr.themes.Soft(),
|
518 |
css="""
|
519 |
.gradio-container {
|
520 |
max-width: 1400px !important;
|
521 |
}
|
522 |
+
.prompt-info {
|
523 |
+
background: linear-gradient(90deg, #f0f9ff 0%, #e0f2fe 100%);
|
524 |
padding: 1rem;
|
525 |
border-radius: 8px;
|
526 |
+
border-left: 4px solid #0284c7;
|
527 |
+
}
|
528 |
+
.agent-summary {
|
529 |
+
background: linear-gradient(90deg, #fefce8 0%, #fef3c7 100%);
|
530 |
+
padding: 1rem;
|
531 |
+
border-radius: 8px;
|
532 |
+
border-left: 4px solid #f59e0b;
|
533 |
}
|
534 |
"""
|
535 |
) as demo:
|
536 |
|
537 |
gr.Markdown("""
|
538 |
+
# 🤖 Prompt-Based Multi-Agent Hate Speech Detection System
|
539 |
+
|
540 |
+
**Advanced AI Agent Collaboration with Custom Prompts**
|
541 |
|
542 |
+
🔍 **Detection Agent** - Your fine-tuned DistilBERT model
|
543 |
+
💬 **Counter-Speech Agent** - FLAN-T5 with custom prompt engineering
|
544 |
+
🛡️ **Moderation Agent** - Structured analysis using specialized prompts
|
545 |
+
📊 **Sentiment Agent** - Twitter-RoBERTa for emotional context
|
546 |
|
547 |
+
*Each agent uses carefully crafted prompts from external JSON files for optimal performance.*
|
|
|
|
|
|
|
548 |
""")
|
549 |
|
550 |
+
with gr.Tab("🤖 Prompt-Based Analysis"):
|
551 |
with gr.Row():
|
552 |
with gr.Column(scale=2):
|
553 |
text_input = gr.Textbox(
|
554 |
+
label="Enter text for comprehensive prompt-based analysis",
|
555 |
+
placeholder="Enter text here to see how prompt-engineered AI agents collaborate...",
|
556 |
lines=5,
|
557 |
max_lines=15
|
558 |
)
|
559 |
|
560 |
with gr.Row():
|
561 |
+
analyze_btn = gr.Button("🚀 Run Prompt-Based Analysis", variant="primary", size="lg")
|
562 |
clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
|
563 |
+
reload_btn = gr.Button("🔄 Reload Prompts", variant="secondary")
|
564 |
|
565 |
gr.Examples(
|
566 |
examples=[
|
567 |
+
["This is a wonderful day to collaborate and learn from each other!"],
|
568 |
+
["I appreciate everyone's different perspectives and backgrounds."],
|
569 |
+
["Let's work together to build a more inclusive community."],
|
570 |
+
["Thank you for sharing your experience. I'd love to understand your viewpoint better."],
|
571 |
+
["The diversity in our group makes our discussions much richer and more meaningful."],
|
572 |
+
["I respectfully disagree, but I value your right to express your opinion."]
|
573 |
],
|
574 |
inputs=text_input,
|
575 |
+
label="📝 Try these examples with prompt-based agents:"
|
576 |
)
|
577 |
|
578 |
with gr.Row():
|
579 |
+
with gr.Column():
|
580 |
+
detection_output = gr.Textbox(
|
581 |
+
label="🎯 Primary Detection Result",
|
582 |
interactive=False,
|
583 |
lines=3
|
584 |
)
|
585 |
|
586 |
+
agent_summary = gr.Textbox(
|
587 |
+
label="🤖 Prompt-Based Agent Summary",
|
588 |
interactive=False,
|
589 |
+
lines=8,
|
590 |
+
elem_classes=["agent-summary"]
|
591 |
)
|
592 |
|
593 |
+
with gr.Column():
|
594 |
+
counter_speech_output = gr.Textbox(
|
595 |
+
label="💬 Prompt-Generated Counter-Speech",
|
596 |
interactive=False,
|
597 |
+
lines=6
|
598 |
+
)
|
599 |
+
|
600 |
+
reload_status = gr.Textbox(
|
601 |
+
label="🔄 Prompt Reload Status",
|
602 |
+
interactive=False,
|
603 |
+
lines=2
|
604 |
)
|
605 |
|
606 |
with gr.Row():
|
607 |
+
all_agents_output = gr.JSON(
|
608 |
+
label="📊 Complete Prompt-Based Multi-Agent Analysis",
|
609 |
visible=True
|
610 |
)
|
611 |
|
612 |
+
with gr.Tab("📝 Prompt Management"):
|
613 |
with gr.Row():
|
614 |
with gr.Column():
|
615 |
+
gr.Markdown("""
|
616 |
+
## 📝 Counter-Speech Prompts
|
617 |
+
|
618 |
+
The system uses specialized prompts for different risk levels:
|
619 |
+
|
620 |
+
### 🚨 High Risk Prompts
|
621 |
+
- **Purpose**: Address clear hate speech with educational responses
|
622 |
+
- **Tone**: Firm but respectful, educational focus
|
623 |
+
- **Length**: 50-100 words
|
624 |
+
- **Goal**: De-escalation and education
|
625 |
+
|
626 |
+
### ⚠️ Medium Risk Prompts
|
627 |
+
- **Purpose**: Handle potentially problematic content
|
628 |
+
- **Tone**: Gentle guidance, supportive
|
629 |
+
- **Length**: 40-80 words
|
630 |
+
- **Goal**: Reflection and improvement
|
631 |
+
|
632 |
+
### ⚡ Low Risk Prompts
|
633 |
+
- **Purpose**: Encourage even better communication
|
634 |
+
- **Tone**: Positive reinforcement
|
635 |
+
- **Length**: 30-60 words
|
636 |
+
- **Goal**: Enhancement and encouragement
|
637 |
+
""")
|
638 |
|
639 |
with gr.Column():
|
640 |
gr.Markdown("""
|
641 |
+
## 🛡️ Moderation Prompts
|
642 |
+
|
643 |
+
Structured analysis prompts for comprehensive assessment:
|
644 |
|
645 |
+
### 🔍 Comprehensive Analysis
|
646 |
+
- **Safety Assessment**: SAFE/CONCERNING/HARMFUL
|
647 |
+
- **Violation Categories**: Specific policy areas
|
648 |
+
- **Severity Levels**: LOW/MEDIUM/HIGH
|
649 |
+
- **Confidence Scoring**: 0-100% certainty
|
650 |
+
- **Contextual Factors**: Cultural and situational
|
651 |
|
652 |
+
### 📊 Specialized Analysis Types
|
653 |
+
- **Hate Speech Focus**: Protected group targeting
|
654 |
+
- **Toxicity Assessment**: Discourse quality impact
|
655 |
+
- **Context Analysis**: Cultural and situational factors
|
656 |
+
- **Action Recommendations**: Specific moderation steps
|
657 |
""")
|
658 |
+
|
659 |
+
with gr.Row():
|
660 |
+
prompt_info_output = gr.JSON(
|
661 |
+
label="📋 Current Prompt Configuration",
|
662 |
+
value=get_prompt_info()
|
663 |
+
)
|
664 |
+
|
665 |
gr.Markdown("""
|
666 |
+
## 📁 Prompt File Structure
|
667 |
+
|
668 |
+
To customize the system behavior, create these JSON files:
|
669 |
+
|
670 |
+
### `counter_speech_prompts.json`
|
671 |
+
```json
|
672 |
+
{
|
673 |
+
"counter_speech_prompts": {
|
674 |
+
"high_risk": {
|
675 |
+
"system_prompt": "You are an expert educator...",
|
676 |
+
"user_prompt_template": "Generate response for: {original_text}..."
|
677 |
+
}
|
678 |
+
}
|
679 |
+
}
|
680 |
+
```
|
681 |
|
682 |
+
### `moderation_prompts.json`
|
683 |
+
```json
|
684 |
+
{
|
685 |
+
"moderation_prompts": {
|
686 |
+
"comprehensive_analysis": {
|
687 |
+
"system_prompt": "You are a content moderation expert...",
|
688 |
+
"user_prompt_template": "Analyze: {text}..."
|
689 |
+
}
|
690 |
+
}
|
691 |
+
}
|
692 |
+
```
|
693 |
|
694 |
+
**Benefits of External Prompts:**
|
695 |
+
- 🎯 **Fine-tuned control** over agent behavior
|
696 |
+
- 🔄 **Easy iteration** without code changes
|
697 |
+
- 📊 **A/B testing** of different prompt strategies
|
698 |
+
- 🎨 **Domain-specific customization** for different platforms
|
699 |
+
- 📈 **Performance optimization** through prompt engineering
|
700 |
+
""")
|
701 |
+
|
702 |
+
with gr.Tab("🔧 System Architecture"):
|
703 |
+
gr.Markdown("""
|
704 |
+
## 🏗️ Prompt-Based Agent Architecture
|
705 |
|
706 |
+
### 🔄 Agent Collaboration Flow
|
707 |
```
|
708 |
+
Input Text
|
709 |
+
├── Detection Agent → Risk Classification (DistilBERT)
|
710 |
+
├── Sentiment Agent → Emotional Context (RoBERTa)
|
711 |
+
├── Moderation Agent → Structured Analysis (FLAN-T5 + Prompts)
|
712 |
+
└── Counter-Speech Agent → Educational Response (FLAN-T5 + Prompts)
|
713 |
+
↑
|
714 |
+
Uses custom prompts and outputs from all other agents
|
715 |
```
|
716 |
|
717 |
+
### 📝 Prompt Engineering Advantages
|
718 |
+
|
719 |
+
#### 🎯 **Precision Control**
|
720 |
+
- **Task-specific instructions** for each scenario
|
721 |
+
- **Tone and style guidelines** for appropriate responses
|
722 |
+
- **Length and format specifications** for consistency
|
723 |
+
- **Context integration** from multiple agent outputs
|
724 |
+
|
725 |
+
#### 🔄 **Iterative Improvement**
|
726 |
+
- **Hot-swappable prompts** without system restart
|
727 |
+
- **A/B testing capabilities** for prompt effectiveness
|
728 |
+
- **Performance metrics** tracking for optimization
|
729 |
+
- **Domain adaptation** for different use cases
|
730 |
+
|
731 |
+
#### 🛡️ **Quality Assurance**
|
732 |
+
- **Bias mitigation** through careful prompt design
|
733 |
+
- **Safety guardrails** built into prompt structure
|
734 |
+
- **Consistency enforcement** across all responses
|
735 |
+
- **Cultural sensitivity** considerations
|
736 |
+
|
737 |
+
### 🚀 Production Benefits
|
738 |
+
|
739 |
+
- **🎨 Customizable**: Adapt to different platforms and communities
|
740 |
+
- **📈 Scalable**: Easy to add new prompt categories
|
741 |
+
- **🔧 Maintainable**: Update behavior without code deployment
|
742 |
+
- **📊 Measurable**: Track prompt performance and effectiveness
|
743 |
+
- **🌍 Localizable**: Different prompts for different regions/cultures
|
744 |
+
|
745 |
+
### ⚠️ Deployment Considerations
|
746 |
+
|
747 |
+
#### 🔒 Security
|
748 |
+
- **Prompt injection protection** for user inputs
|
749 |
+
- **Content filtering** on generated responses
|
750 |
+
- **Rate limiting** to prevent abuse
|
751 |
+
- **Audit logging** for compliance
|
752 |
+
|
753 |
+
#### 📊 Monitoring
|
754 |
+
- **Response quality metrics** tracking
|
755 |
+
- **User feedback integration** for continuous improvement
|
756 |
+
- **Error rate monitoring** across different prompt types
|
757 |
+
- **Performance benchmarking** against baseline models
|
758 |
+
|
759 |
+
#### 👥 Human Oversight
|
760 |
+
- **Expert review processes** for prompt updates
|
761 |
+
- **Community feedback loops** for prompt effectiveness
|
762 |
+
- **Escalation pathways** for edge cases
|
763 |
+
- **Regular bias audits** and prompt refinement
|
764 |
""")
|
765 |
|
766 |
# Event handlers
|
767 |
analyze_btn.click(
|
768 |
+
fn=analyze_text_with_prompts,
|
769 |
inputs=text_input,
|
770 |
+
outputs=[detection_output, all_agents_output, counter_speech_output, agent_summary, all_agents_output]
|
771 |
)
|
772 |
|
773 |
clear_btn.click(
|
774 |
fn=lambda: ("", "", "", "", {}),
|
775 |
+
outputs=[text_input, detection_output, counter_speech_output, agent_summary, all_agents_output]
|
776 |
+
)
|
777 |
+
|
778 |
+
reload_btn.click(
|
779 |
+
fn=reload_prompts,
|
780 |
+
outputs=reload_status
|
781 |
)
|
782 |
|
783 |
# Launch configuration
|