LPX55 commited on
Commit
840b896
·
verified ·
1 Parent(s): 7c46ab5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +377 -0
app.py ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import List, Tuple
3
+ import gradio as gr
4
+
5
+ class ContentSafetyChecker:
6
+ def __init__(self):
7
+ # Pre-compile regex patterns for performance
8
+ self.csam_patterns = self._compile_csam_patterns()
9
+ self.suspicious_combinations = self._compile_suspicious_combinations()
10
+
11
+ def _compile_csam_patterns(self) -> List[re.Pattern]:
12
+ """Compile CSAM detection patterns"""
13
+ patterns = [
14
+ # Direct CSAM references
15
+ r'\b(csam|child.*sexual|minor.*abuse|illegal.*content)',
16
+ # Age + sexual content combinations
17
+ r'\b(child|kid|baby|toddler|infant|minor|youngster|youth|teen).*\b(sex|nude|naked|pussy|penis|vagina)',
18
+ r'\b(sex|nude|naked|porn|erotic|seductive|lust|breast|genital|penis|vagina).*\b(child|kid|baby|toddler|infant|minor|youngster|youth|teen)',
19
+ # Numerical age checks with sexual content
20
+ r'\b(?:age|aged?|years? old)\s*[=\-:]\s*(?:[0-9]|1[0-7])\b.*\b(sex|nude|naked|porn|erotic)',
21
+ r'\b([0-9]|1[0-7])\s*(?:years? old|yo|y/o)\b.*\b(sex|nude|naked|porn|erotic)',
22
+ # Numbers 3-17 directly followed by sexual content terms
23
+ r'\b([3-9]|1[0-7])\s*[,\-]?\s*(sex|nude|naked|porn|erotic|seductive|explicit)',
24
+ # Descriptive numerical age checks
25
+ r'\b(under\s*(1[0-8]|[0-9])|below\s*(1[0-8]|[0-9])|less than\s*(1[0-8]|[0-9]))\s*years?\s*[,\-]?\s*(sex|nude|naked|porn|erotic)',
26
+ # Inappropriate scenarios with numerical ages
27
+ r'\b(pedophil|lolita|underage.*sex|minor.*sex|illegal.*sex)',
28
+ # Grooming language
29
+ r'\b(inappropriate.*adult.*child|adult.*minor.*relationship|groom)',
30
+ ]
31
+ return [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
32
+
33
+ def _compile_suspicious_combinations(self) -> List[Tuple[List[str], List[str], int]]:
34
+ """Define suspicious word combinations with numerical thresholds"""
35
+ return [
36
+ # Age indicators + sexual content ( Fixed tuple structure and removed extra parameters)
37
+ (['4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'young teen', 'minor', 'child', 'kid', 'baby'],
38
+ ['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'penis', 'vagina', 'penetration', 'cock', 'pussy'], 8),
39
+ (['adult'],
40
+ ['pedo', 'loli', 'secret', 'rape'], 5),
41
+ ]
42
+
43
+ def check_pattern_matching(self, prompt: str) -> bool:
44
+ """First line of defense: regex pattern matching"""
45
+ for pattern in self.csam_patterns:
46
+ if pattern.search(prompt):
47
+ print(f"🚨 Pattern matched: {pattern.pattern}")
48
+ return True
49
+ return False
50
+
51
+ def check_numerical_age_context(self, prompt: str) -> bool:
52
+ """Check for numerical ages in suspicious contexts"""
53
+ prompt_lower = prompt.lower()
54
+
55
+ # Extract all numbers from prompt
56
+ numbers = re.findall(r'\b\d+\b', prompt_lower)
57
+
58
+ # Suspicious sexual content terms
59
+ sexual_terms = ['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'explicit', 'undress', 'penis', 'vagina', 'pussy', 'cock']
60
+
61
+ # Check each number
62
+ for num_str in numbers:
63
+ try:
64
+ age = int(num_str)
65
+ # If age is in minor range (3-17) and sexual content is mentioned
66
+ if 3 <= age <= 17 and any(term in prompt_lower for term in sexual_terms):
67
+ print(f"🚨 Suspicious age with sexual content: {age}")
68
+ return True
69
+ # Also check for descriptive phrases like "12 year old"
70
+ age_phrases = [
71
+ f"{num_str} year old",
72
+ f"{num_str} years old",
73
+ f"{num_str}yo",
74
+ f"{num_str} y/o"
75
+ ]
76
+ if any(phrase in prompt_lower for phrase in age_phrases):
77
+ if 3 <= age <= 17 and any(term in prompt_lower for term in sexual_terms):
78
+ print(f"🚨 Suspicious age phrase with sexual content: {age}")
79
+ return True
80
+ except ValueError:
81
+ continue
82
+
83
+ # Check for phrases like "under 18", "below 16", etc.
84
+ age_limit_phrases = [
85
+ r'\bunder\s*(1[0-8]|[0-9])\b',
86
+ r'\bbelow\s*(1[0-8]|[0-9])\b',
87
+ r'\bless than\s*(1[0-8]|[0-9])\b',
88
+ r'\b[0-9]{1,2}\s*and under\b'
89
+ ]
90
+
91
+ for phrase_pattern in age_limit_phrases:
92
+ if re.search(phrase_pattern, prompt_lower):
93
+ # Check if followed by sexual content within reasonable distance
94
+ match = re.search(phrase_pattern, prompt_lower)
95
+ phrase_end = match.end()
96
+ remaining_text = prompt_lower[phrase_end:]
97
+ if any(term in remaining_text for term in sexual_terms):
98
+ print(f"🚨 Age limit phrase with sexual content: {match.group()}")
99
+ return True
100
+
101
+ return False
102
+
103
+ def check_age_sexual_combinations(self, prompt: str) -> bool:
104
+ """More sophisticated check for age terms combined with sexual content"""
105
+ prompt_lower = prompt.lower()
106
+
107
+ # Check age + sexual content with numerical extraction
108
+ age_sexual_patterns = [
109
+ r'(child|kid|baby|toddler|infant|minor|youngster|teen|youth)\s*(?:of|is|was)?\s*(\d{1,2})\s*(years?\s*old)?',
110
+ r'(\d{1,2})\s*(years?\s*old|yo|y/o)?\s*(child|kid|baby|toddler|infant|minor|young)',
111
+ ]
112
+
113
+ sexual_content_patterns = ['nude', 'naked', 'sex', 'porn', 'erotic', 'explicit', 'sexual', 'penis', 'vagina', 'pussy', 'cock']
114
+
115
+ for age_pattern in age_sexual_patterns:
116
+ age_matches = re.finditer(age_pattern, prompt_lower)
117
+ for match in age_matches:
118
+ # Extract age number from match groups
119
+ age = None
120
+ for group in match.groups():
121
+ if group and group.isdigit():
122
+ age = int(group)
123
+ break
124
+
125
+ if age is not None and 3 <= age <= 17: # Minor age range
126
+ # Look for sexual content nearby (within 50 characters)
127
+ start_pos = max(0, match.start() - 50)
128
+ end_pos = min(len(prompt_lower), match.end() + 50)
129
+ context = prompt_lower[start_pos:end_pos]
130
+
131
+ if any(term in context for term in sexual_content_patterns):
132
+ print(f"🚨 Age-sexual content combination detected: age {age}")
133
+ return True
134
+
135
+ return False
136
+
137
+ def check_word_proximity(self, prompt: str) -> bool: # Removed unused max_distance parameter
138
+ """Second line: check proximity of suspicious words including numerical checks"""
139
+ words = prompt.lower().split()
140
+
141
+ # Original non-numerical proximity checks
142
+ for age_words, sexual_words, distance_threshold in self.suspicious_combinations:
143
+ for age_word in age_words:
144
+ for sexual_word in sexual_words:
145
+ if age_word in words and sexual_word in words:
146
+ try:
147
+ age_idx = words.index(age_word)
148
+ sexual_idx = words.index(sexual_word)
149
+ if abs(age_idx - sexual_idx) <= distance_threshold:
150
+ return True
151
+ except ValueError:
152
+ continue
153
+
154
+ return False
155
+
156
+ def simple_age_check(self, prompt: str) -> bool: # Added self parameter
157
+ """Simple numerical age check"""
158
+ numbers = re.findall(r'\b\d+\b', prompt)
159
+ sexual_terms = ['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'explicit', 'penis', 'vagina', 'pussy', 'cock']
160
+
161
+ for num in numbers:
162
+ try:
163
+ age = int(num)
164
+ if 3 <= age <= 17 and any(term in prompt.lower() for term in sexual_terms):
165
+ return True
166
+ except ValueError:
167
+ continue
168
+ return False
169
+
170
+ def check_suspicious_phrases(self, prompt: str) -> bool:
171
+ """Third line: check for specific suspicious phrases"""
172
+ suspicious_phrases = [
173
+ 'young model', 'teen photoshoot', 'minor content',
174
+ 'child art', 'kids only', 'no adults allowed',
175
+ 'private session', 'secret photos', 'hidden content'
176
+ ]
177
+ prompt_lower = prompt.lower()
178
+ for phrase in suspicious_phrases:
179
+ if phrase in prompt_lower:
180
+ return True
181
+ return False
182
+
183
+ def check_evasion_patterns(self, prompt: str) -> bool:
184
+ """Fourth line: check for common evasion techniques"""
185
+ # Look for character substitutions or obfuscation
186
+ evasion_indicators = [
187
+ r'p\w*is',
188
+ r'p\w*sy',
189
+ r's\w*x',
190
+ r'n\w*d\w*',
191
+ ]
192
+ for pattern in evasion_indicators:
193
+ if re.search(pattern, prompt, re.IGNORECASE):
194
+ # Additional check: see if it's combined with age-related terms
195
+ age_indicators = ['old', 'teen', 'baby', 'kid', 'child', 'minor', 'young']
196
+ if any(indicator in prompt.lower() for indicator in age_indicators):
197
+ return True
198
+ return False
199
+
200
+ def check_numerical_age_indicators(self, prompt: str) -> bool:
201
+ """Fifth line: check for specific age numbers with sexual content"""
202
+ # Extract numbers from prompt
203
+ numbers = re.findall(r'\b\d+\b', prompt)
204
+ sexual_indicators = ['nude', 'naked', 'sex', 'porn', 'erotic', 'nudify', 'off', 'penis', 'cock', 'pussy', 'vagina', 'insert']
205
+
206
+ for num_str in numbers:
207
+ try:
208
+ age = int(num_str)
209
+ # Flag ages typically associated with minors when combined with sexual content
210
+ if 3 <= age <= 17:
211
+ prompt_lower = prompt.lower()
212
+ if any(indicator in prompt_lower for indicator in sexual_indicators):
213
+ return True
214
+ except ValueError:
215
+ continue
216
+ return False
217
+
218
+ def check_contextual_red_flags(self, prompt: str) -> bool:
219
+ """Sixth line: check for contextual red flags"""
220
+ red_flag_contexts = [
221
+ # Suspicious session types
222
+ (r'(private|secret|hidden|exclusive).*photo', ['nude', 'naked', 'photos']), # Fixed this tuple (was missing opening parenthesis)
223
+ # Suspicious content descriptions
224
+ (r'(special|unique|exclusive|view|angle|pov|shot).*content', ['young', 'minor', 'teen'])
225
+ ]
226
+
227
+ prompt_lower = prompt.lower()
228
+ for context_pattern, suspicious_words in red_flag_contexts:
229
+ if re.search(context_pattern, prompt_lower):
230
+ if any(word in prompt_lower for word in suspicious_words):
231
+ return True
232
+ return False
233
+
234
+ def is_content_suspicious(self, prompt: str) -> Tuple[bool, str]:
235
+ """Main safety checking function with detailed feedback"""
236
+ checks = [
237
+ (self.check_pattern_matching, "Pattern matching detected suspicious content"),
238
+ (self.check_word_proximity, "Suspicious word proximity detected"),
239
+ (self.check_suspicious_phrases, "Suspicious phrases detected"),
240
+ (self.check_evasion_patterns, "Potential evasion patterns detected"),
241
+ (self.check_numerical_age_indicators, "Suspicious age indicators with sexual content"),
242
+ (self.check_contextual_red_flags, "Contextual red flags detected"),
243
+ (self.simple_age_check, "Simple age check detected suspicious content"), # Added the missing simple_age_check
244
+ (self.check_numerical_age_context, "Numerical age context check detected suspicious content"), # Added numerical age context check
245
+ (self.check_age_sexual_combinations, "Age-sexual combination check detected suspicious content") # Added age-sexual combination check
246
+ ]
247
+
248
+ for check_func, message in checks:
249
+ try:
250
+ if check_func(prompt):
251
+ return True, message
252
+ except Exception as e:
253
+ print(f"Warning: Safety check {check_func.__name__} failed: {e}")
254
+ continue
255
+
256
+ return False, "Content appears safe"
257
+
258
+ # Enhanced safety function
259
+ def comprehensive_safety_check(prompt: str) -> Tuple[bool, str]:
260
+ """Multi-layer safety checking with fallback mechanisms"""
261
+ try:
262
+ # Initialize the safety checker for each check (ensures fresh state)
263
+ safety_checker = ContentSafetyChecker()
264
+
265
+ # Primary check
266
+ is_suspicious, message = safety_checker.is_content_suspicious(prompt)
267
+ if is_suspicious:
268
+ return True, message
269
+
270
+ # Fallback checks if primary fails
271
+ fallback_checks = [
272
+ lambda p: len(p) > 1000, # Unusually long prompts (potential obfuscation)
273
+ lambda p: p.count('"') > 20, # Excessive quotes (potential code injection)
274
+ lambda p: '||' in p or '&&' in p, # Shell command operators
275
+ lambda p: any(char in p for char in ['<script', 'javascript:', 'onload=']), # Basic XSS
276
+ ]
277
+
278
+ for i, check in enumerate(fallback_checks):
279
+ try:
280
+ if check(prompt):
281
+ return True, f"Fallback safety check #{i+1} triggered"
282
+ except Exception:
283
+ continue
284
+
285
+ return False, "All safety checks passed"
286
+
287
+ except Exception as e:
288
+ print(f"Safety check failed, erring on caution: {e}")
289
+ return True, "Safety system error - content blocked for caution"
290
+
291
+ # Usage in your application
292
+ def test_prompt(original_prompt: str) -> str:
293
+ is_blocked, reason = comprehensive_safety_check(original_prompt)
294
+
295
+ if is_blocked:
296
+ print(f"🚨 SAFETY BLOCK: {reason} - Prompt: {original_prompt[:100]}...")
297
+ # Log the blocked content (for monitoring)
298
+ try:
299
+ with open("safety_blocks.log", "a") as f:
300
+ f.write(f"{reason}: {original_prompt}\n")
301
+ except Exception:
302
+ print("Warning: Could not write to safety log")
303
+ return "A professional and appropriate image editing task" # Safe fallback
304
+
305
+ # If safe, proceed with normal enhancement
306
+ return f"Regex safety check passed: {original_prompt}"
307
+
308
+ import gradio as gr
309
+
310
+ with gr.Blocks(title="Content Safety Checker | Testing Zone") as demo:
311
+ gr.Markdown("""
312
+ # 🛡️ Regex-based Content Safety Checker
313
+ Some of you guys need to seek help...
314
+ """)
315
+
316
+ with gr.Row():
317
+ with gr.Column(scale=1):
318
+ input_prompt = gr.Textbox(
319
+ label="Test Prompt",
320
+ placeholder="Type a prompt to check for safety...",
321
+ lines=5,
322
+ max_lines=10
323
+ )
324
+ check_button = gr.Button("Check Safety", variant="primary")
325
+ clear_button = gr.Button("Clear", variant="secondary")
326
+
327
+ with gr.Column(scale=2):
328
+ output_result = gr.Textbox(
329
+ label="Safety Check Result",
330
+ interactive=False,
331
+ lines=10,
332
+ max_lines=15
333
+ )
334
+ safety_info = gr.HTML(
335
+ value="<div style='padding:15px; margin-top:15px; background: #f8f9fa; border-radius: 8px;'>"
336
+ "<h4>ℹ️ How it works:</h4>"
337
+ "<ul>"
338
+ "<li>Multi-layer safety checking system</li>"
339
+ "<li>Detects age-related terms combined with sexual content</li>"
340
+ "<li>Identifies potential CSAM/illegal content patterns</li>"
341
+ "<li>Checks for evasion techniques and suspicious combinations</li>"
342
+ "</ul>"
343
+ "</div>"
344
+ )
345
+
346
+ def process_prompt(prompt):
347
+ if not prompt or not prompt.strip():
348
+ return "Please enter a prompt to test."
349
+
350
+ is_blocked, reason = comprehensive_safety_check(prompt)
351
+ if is_blocked:
352
+ return f"🚨 BLOCKED: {reason}\n\nOriginal prompt: {prompt}"
353
+ else:
354
+ return f"✅ SAFE: {reason}\n\nOriginal prompt: {prompt}"
355
+
356
+ def clear_inputs():
357
+ return "", ""
358
+
359
+ check_button.click(
360
+ fn=process_prompt,
361
+ inputs=input_prompt,
362
+ outputs=output_result
363
+ )
364
+
365
+ input_prompt.submit(
366
+ fn=process_prompt,
367
+ inputs=input_prompt,
368
+ outputs=output_result
369
+ )
370
+
371
+ clear_button.click(
372
+ fn=clear_inputs,
373
+ inputs=None,
374
+ outputs=[input_prompt, output_result]
375
+ )
376
+
377
+ demo.launch()