Spaces:

LPX55
/

csam_patterns

Sleeping

App Files Files Community

LPX55 commited on Sep 8, 2025

Commit

840b896

verified ·

1 Parent(s): 7c46ab5

Create app.py

Browse files

Files changed (1) hide show

app.py +377 -0

app.py ADDED Viewed

	@@ -0,0 +1,377 @@

+import re
+from typing import List, Tuple
+import gradio as gr
+class ContentSafetyChecker:
+    def __init__(self):
+        # Pre-compile regex patterns for performance
+        self.csam_patterns = self._compile_csam_patterns()
+        self.suspicious_combinations = self._compile_suspicious_combinations()
+    def _compile_csam_patterns(self) -> List[re.Pattern]:
+        """Compile CSAM detection patterns"""
+        patterns = [
+            # Direct CSAM references
+            r'\b(csam|child.*sexual|minor.*abuse|illegal.*content)',
+            # Age + sexual content combinations
+            r'\b(child|kid|baby|toddler|infant|minor|youngster|youth|teen).*\b(sex|nude|naked|pussy|penis|vagina)',
+            r'\b(sex|nude|naked|porn|erotic|seductive|lust|breast|genital|penis|vagina).*\b(child|kid|baby|toddler|infant|minor|youngster|youth|teen)',
+            # Numerical age checks with sexual content
+            r'\b(?:age|aged?|years? old)\s*[=\-:]\s*(?:[0-9]|1[0-7])\b.*\b(sex|nude|naked|porn|erotic)',
+            r'\b([0-9]|1[0-7])\s*(?:years? old|yo|y/o)\b.*\b(sex|nude|naked|porn|erotic)',
+            # Numbers 3-17 directly followed by sexual content terms
+            r'\b([3-9]|1[0-7])\s*[,\-]?\s*(sex|nude|naked|porn|erotic|seductive|explicit)',
+            # Descriptive numerical age checks
+            r'\b(under\s*(1[0-8]|[0-9])|below\s*(1[0-8]|[0-9])|less than\s*(1[0-8]|[0-9]))\s*years?\s*[,\-]?\s*(sex|nude|naked|porn|erotic)',
+            # Inappropriate scenarios with numerical ages
+            r'\b(pedophil|lolita|underage.*sex|minor.*sex|illegal.*sex)',
+            # Grooming language
+            r'\b(inappropriate.*adult.*child|adult.*minor.*relationship|groom)',
+        ]
+        return [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
+    def _compile_suspicious_combinations(self) -> List[Tuple[List[str], List[str], int]]:
+        """Define suspicious word combinations with numerical thresholds"""
+        return [
+            # Age indicators + sexual content ( Fixed tuple structure and removed extra parameters)
+            (['4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'young teen', 'minor', 'child', 'kid', 'baby'],
+             ['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'penis', 'vagina', 'penetration', 'cock', 'pussy'], 8),
+            (['adult'],
+             ['pedo', 'loli', 'secret', 'rape'], 5),
+        ]
+    def check_pattern_matching(self, prompt: str) -> bool:
+        """First line of defense: regex pattern matching"""
+        for pattern in self.csam_patterns:
+            if pattern.search(prompt):
+                print(f"🚨 Pattern matched: {pattern.pattern}")
+                return True
+        return False
+    def check_numerical_age_context(self, prompt: str) -> bool:
+        """Check for numerical ages in suspicious contexts"""
+        prompt_lower = prompt.lower()
+        # Extract all numbers from prompt
+        numbers = re.findall(r'\b\d+\b', prompt_lower)
+        # Suspicious sexual content terms
+        sexual_terms = ['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'explicit', 'undress', 'penis', 'vagina', 'pussy', 'cock']
+        # Check each number
+        for num_str in numbers:
+            try:
+                age = int(num_str)
+                # If age is in minor range (3-17) and sexual content is mentioned
+                if 3 <= age <= 17 and any(term in prompt_lower for term in sexual_terms):
+                    print(f"🚨 Suspicious age with sexual content: {age}")
+                    return True
+                # Also check for descriptive phrases like "12 year old"
+                age_phrases = [
+                    f"{num_str} year old",
+                    f"{num_str} years old",
+                    f"{num_str}yo",
+                    f"{num_str} y/o"
+                ]
+                if any(phrase in prompt_lower for phrase in age_phrases):
+                    if 3 <= age <= 17 and any(term in prompt_lower for term in sexual_terms):
+                        print(f"🚨 Suspicious age phrase with sexual content: {age}")
+                        return True
+            except ValueError:
+                continue
+        # Check for phrases like "under 18", "below 16", etc.
+        age_limit_phrases = [
+            r'\bunder\s*(1[0-8]|[0-9])\b',
+            r'\bbelow\s*(1[0-8]|[0-9])\b',
+            r'\bless than\s*(1[0-8]|[0-9])\b',
+            r'\b[0-9]{1,2}\s*and under\b'
+        ]
+        for phrase_pattern in age_limit_phrases:
+            if re.search(phrase_pattern, prompt_lower):
+                # Check if followed by sexual content within reasonable distance
+                match = re.search(phrase_pattern, prompt_lower)
+                phrase_end = match.end()
+                remaining_text = prompt_lower[phrase_end:]
+                if any(term in remaining_text for term in sexual_terms):
+                    print(f"🚨 Age limit phrase with sexual content: {match.group()}")
+                    return True
+        return False
+    def check_age_sexual_combinations(self, prompt: str) -> bool:
+        """More sophisticated check for age terms combined with sexual content"""
+        prompt_lower = prompt.lower()
+        # Check age + sexual content with numerical extraction
+        age_sexual_patterns = [
+            r'(child|kid|baby|toddler|infant|minor|youngster|teen|youth)\s*(?:of|is|was)?\s*(\d{1,2})\s*(years?\s*old)?',
+            r'(\d{1,2})\s*(years?\s*old|yo|y/o)?\s*(child|kid|baby|toddler|infant|minor|young)',
+        ]
+        sexual_content_patterns = ['nude', 'naked', 'sex', 'porn', 'erotic', 'explicit', 'sexual', 'penis', 'vagina', 'pussy', 'cock']
+        for age_pattern in age_sexual_patterns:
+            age_matches = re.finditer(age_pattern, prompt_lower)
+            for match in age_matches:
+                # Extract age number from match groups
+                age = None
+                for group in match.groups():
+                    if group and group.isdigit():
+                        age = int(group)
+                        break
+                if age is not None and 3 <= age <= 17:  # Minor age range
+                    # Look for sexual content nearby (within 50 characters)
+                    start_pos = max(0, match.start() - 50)
+                    end_pos = min(len(prompt_lower), match.end() + 50)
+                    context = prompt_lower[start_pos:end_pos]
+                    if any(term in context for term in sexual_content_patterns):
+                        print(f"🚨 Age-sexual content combination detected: age {age}")
+                        return True
+        return False
+    def check_word_proximity(self, prompt: str) -> bool:  # Removed unused max_distance parameter
+        """Second line: check proximity of suspicious words including numerical checks"""
+        words = prompt.lower().split()
+        # Original non-numerical proximity checks
+        for age_words, sexual_words, distance_threshold in self.suspicious_combinations:
+            for age_word in age_words:
+                for sexual_word in sexual_words:
+                    if age_word in words and sexual_word in words:
+                        try:
+                            age_idx = words.index(age_word)
+                            sexual_idx = words.index(sexual_word)
+                            if abs(age_idx - sexual_idx) <= distance_threshold:
+                                return True
+                        except ValueError:
+                            continue
+        return False
+    def simple_age_check(self, prompt: str) -> bool:  # Added self parameter
+        """Simple numerical age check"""
+        numbers = re.findall(r'\b\d+\b', prompt)
+        sexual_terms = ['nude', 'naked', 'sex', 'porn', 'erotic', 'seductive', 'explicit', 'penis', 'vagina', 'pussy', 'cock']
+        for num in numbers:
+            try:
+                age = int(num)
+                if 3 <= age <= 17 and any(term in prompt.lower() for term in sexual_terms):
+                    return True
+            except ValueError:
+                continue
+        return False
+    def check_suspicious_phrases(self, prompt: str) -> bool:
+        """Third line: check for specific suspicious phrases"""
+        suspicious_phrases = [
+            'young model', 'teen photoshoot', 'minor content',
+            'child art', 'kids only', 'no adults allowed',
+            'private session', 'secret photos', 'hidden content'
+        ]
+        prompt_lower = prompt.lower()
+        for phrase in suspicious_phrases:
+            if phrase in prompt_lower:
+                return True
+        return False
+    def check_evasion_patterns(self, prompt: str) -> bool:
+        """Fourth line: check for common evasion techniques"""
+        # Look for character substitutions or obfuscation
+        evasion_indicators = [
+            r'p\w*is',
+            r'p\w*sy',
+            r's\w*x',
+            r'n\w*d\w*',
+        ]
+        for pattern in evasion_indicators:
+            if re.search(pattern, prompt, re.IGNORECASE):
+                # Additional check: see if it's combined with age-related terms
+                age_indicators = ['old', 'teen', 'baby', 'kid', 'child', 'minor', 'young']
+                if any(indicator in prompt.lower() for indicator in age_indicators):
+                    return True
+        return False
+    def check_numerical_age_indicators(self, prompt: str) -> bool:
+        """Fifth line: check for specific age numbers with sexual content"""
+        # Extract numbers from prompt
+        numbers = re.findall(r'\b\d+\b', prompt)
+        sexual_indicators = ['nude', 'naked', 'sex', 'porn', 'erotic', 'nudify', 'off', 'penis', 'cock', 'pussy', 'vagina', 'insert']
+        for num_str in numbers:
+            try:
+                age = int(num_str)
+                # Flag ages typically associated with minors when combined with sexual content
+                if 3 <= age <= 17:
+                    prompt_lower = prompt.lower()
+                    if any(indicator in prompt_lower for indicator in sexual_indicators):
+                        return True
+            except ValueError:
+                continue
+        return False
+    def check_contextual_red_flags(self, prompt: str) -> bool:
+        """Sixth line: check for contextual red flags"""
+        red_flag_contexts = [
+            # Suspicious session types
+            (r'(private|secret|hidden|exclusive).*photo', ['nude', 'naked', 'photos']),  # Fixed this tuple (was missing opening parenthesis)
+            # Suspicious content descriptions
+            (r'(special|unique|exclusive|view|angle|pov|shot).*content', ['young', 'minor', 'teen'])
+        ]
+        prompt_lower = prompt.lower()
+        for context_pattern, suspicious_words in red_flag_contexts:
+            if re.search(context_pattern, prompt_lower):
+                if any(word in prompt_lower for word in suspicious_words):
+                    return True
+        return False
+    def is_content_suspicious(self, prompt: str) -> Tuple[bool, str]:
+        """Main safety checking function with detailed feedback"""
+        checks = [
+            (self.check_pattern_matching, "Pattern matching detected suspicious content"),
+            (self.check_word_proximity, "Suspicious word proximity detected"),
+            (self.check_suspicious_phrases, "Suspicious phrases detected"),
+            (self.check_evasion_patterns, "Potential evasion patterns detected"),
+            (self.check_numerical_age_indicators, "Suspicious age indicators with sexual content"),
+            (self.check_contextual_red_flags, "Contextual red flags detected"),
+            (self.simple_age_check, "Simple age check detected suspicious content"),  # Added the missing simple_age_check
+            (self.check_numerical_age_context, "Numerical age context check detected suspicious content"),  # Added numerical age context check
+            (self.check_age_sexual_combinations, "Age-sexual combination check detected suspicious content")  # Added age-sexual combination check
+        ]
+        for check_func, message in checks:
+            try:
+                if check_func(prompt):
+                    return True, message
+            except Exception as e:
+                print(f"Warning: Safety check {check_func.__name__} failed: {e}")
+                continue
+        return False, "Content appears safe"
+# Enhanced safety function
+def comprehensive_safety_check(prompt: str) -> Tuple[bool, str]:
+    """Multi-layer safety checking with fallback mechanisms"""
+    try:
+        # Initialize the safety checker for each check (ensures fresh state)
+        safety_checker = ContentSafetyChecker()
+        # Primary check
+        is_suspicious, message = safety_checker.is_content_suspicious(prompt)
+        if is_suspicious:
+            return True, message
+        # Fallback checks if primary fails
+        fallback_checks = [
+            lambda p: len(p) > 1000,  # Unusually long prompts (potential obfuscation)
+            lambda p: p.count('"') > 20,  # Excessive quotes (potential code injection)
+            lambda p: '||' in p or '&&' in p,  # Shell command operators
+            lambda p: any(char in p for char in ['<script', 'javascript:', 'onload=']),  # Basic XSS
+        ]
+        for i, check in enumerate(fallback_checks):
+            try:
+                if check(prompt):
+                    return True, f"Fallback safety check #{i+1} triggered"
+            except Exception:
+                continue
+        return False, "All safety checks passed"
+    except Exception as e:
+        print(f"Safety check failed, erring on caution: {e}")
+        return True, "Safety system error - content blocked for caution"
+# Usage in your application
+def test_prompt(original_prompt: str) -> str:
+    is_blocked, reason = comprehensive_safety_check(original_prompt)
+    if is_blocked:
+        print(f"🚨 SAFETY BLOCK: {reason} - Prompt: {original_prompt[:100]}...")
+        # Log the blocked content (for monitoring)
+        try:
+            with open("safety_blocks.log", "a") as f:
+                f.write(f"{reason}: {original_prompt}\n")
+        except Exception:
+            print("Warning: Could not write to safety log")
+        return "A professional and appropriate image editing task"  # Safe fallback
+    # If safe, proceed with normal enhancement
+    return f"Regex safety check passed: {original_prompt}"
+import gradio as gr
+with gr.Blocks(title="Content Safety Checker | Testing Zone") as demo:
+    gr.Markdown("""
+    # 🛡️ Regex-based Content Safety Checker
+    Some of you guys need to seek help...
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_prompt = gr.Textbox(
+                label="Test Prompt",
+                placeholder="Type a prompt to check for safety...",
+                lines=5,
+                max_lines=10
+            )
+            check_button = gr.Button("Check Safety", variant="primary")
+            clear_button = gr.Button("Clear", variant="secondary")
+        with gr.Column(scale=2):
+            output_result = gr.Textbox(
+                label="Safety Check Result",
+                interactive=False,
+                lines=10,
+                max_lines=15
+            )
+            safety_info = gr.HTML(
+                value="<div style='padding:15px; margin-top:15px; background: #f8f9fa; border-radius: 8px;'>"
+                "<h4>ℹ️ How it works:</h4>"
+                "<ul>"
+                "<li>Multi-layer safety checking system</li>"
+                "<li>Detects age-related terms combined with sexual content</li>"
+                "<li>Identifies potential CSAM/illegal content patterns</li>"
+                "<li>Checks for evasion techniques and suspicious combinations</li>"
+                "</ul>"
+                "</div>"
+            )
+    def process_prompt(prompt):
+        if not prompt or not prompt.strip():
+            return "Please enter a prompt to test."
+        is_blocked, reason = comprehensive_safety_check(prompt)
+        if is_blocked:
+            return f"🚨 BLOCKED: {reason}\n\nOriginal prompt: {prompt}"
+        else:
+            return f"✅ SAFE: {reason}\n\nOriginal prompt: {prompt}"
+    def clear_inputs():
+        return "", ""
+    check_button.click(
+        fn=process_prompt,
+        inputs=input_prompt,
+        outputs=output_result
+    )
+    input_prompt.submit(
+        fn=process_prompt,
+        inputs=input_prompt,
+        outputs=output_result
+    )
+    clear_button.click(
+        fn=clear_inputs,
+        inputs=None,
+        outputs=[input_prompt, output_result]
+    )
+demo.launch()