Spaces:

Veronyka
/

algospeak

Sleeping

File size: 11,463 Bytes

import gradio as gr
from huggingface_hub import InferenceClient
from algospeak_dictionary import get_algospeak_context, ALGOSPEAK_DICT
import os
import re

# Initialize inference client with HF token (automatically provided in Spaces)
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
client = InferenceClient(token=hf_token)

# Complete AlgoSpeak dictionary for prompts
ALGOSPEAK_EXAMPLES = get_algospeak_context()

# Model selection - using models that work reliably with HF Inference API
# GPT-2 is older but 100% reliable for text generation
MODEL = "gpt2-medium"  # Smaller, faster, always works
BACKUP_MODEL = "distilgpt2"  # Even smaller fallback

def translate_to_algospeak_simple(text):
    """Rule-based translation using the dictionary (always works!)"""
    result = text
    
    # Create reverse mapping: meaning -> algospeak term
    replacements = {
        # Death and violence
        "suicide": "sewerslide",
        "kill": "unalive",
        "killing": "unaliving",
        "killed": "unalived",
        "die": "unalive",
        "died": "unalived",
        "death": "unalive",
        "dead": "unalived",
        
        # Weapons and war
        "gun": "pew pew",
        "guns": "pew pews",
        "shooting": "pew pew",
        "shot": "pew pew",
        "weapon": "noodle",
        "weapons": "noodles",
        "war": "cornucopia",
        "bomb": "kaboom",
        
        # Adult content
        "sex": "seggs",
        "sexual": "seggs",
        "porn": "corn",
        "pornography": "corn",
        "lesbian": "le$bian",
        "gay": "g@y",
        
        # Health
        "COVID": "mascara",
        "COVID-19": "mascara",
        "coronavirus": "mascara",
        "vaccine": "backshot",
        "pandemic": "panini",
        
        # LGBTQ+
        "LGBTQ": "leg booty",
        "LGBTQ+": "leg booty",
        
        # Sexual violence
        "rape": "grape",
        "sexual assault": "SA",
        
        # Other
        "sex worker": "accountant",
        "stripper": "skripper",
        "marijuana": "lettuce",
        "weed": "lettuce",
    }
    
    # Sort by length (longest first) to avoid partial replacements
    for original, coded in sorted(replacements.items(), key=lambda x: len(x[0]), reverse=True):
        # Case-insensitive replacement
        pattern = re.compile(re.escape(original), re.IGNORECASE)
        result = pattern.sub(coded, result)
    
    if result == text:
        return f"✨ No sensitive terms detected!\n\nOriginal: {text}\n\n💡 Tip: Try terms like 'suicide', 'sex', 'war', 'COVID', etc."
    
    return f"🔄 Translated to AlgoSpeak:\n\n{result}"

def translate_to_algospeak_ai(text):
    """AI-powered translation with context awareness"""
    prompt = f"""Task: Convert text to AlgoSpeak (coded language to avoid censorship).

Examples:
"someone died" → "someone unalived"
"talking about sex" → "talking about seggs"
"the war continues" → "the cornucopia continues"
"COVID vaccine" → "mascara backshot"

Now convert: {text}

AlgoSpeak:"""

    try:
        response = client.text_generation(
            prompt,
            model=MODEL,
            max_new_tokens=100,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
        )
        # Clean up response
        result = response.strip()
        # Remove common artifacts
        if result.startswith('"') and result.endswith('"'):
            result = result[1:-1]
        return f"🤖 AI Translation:\n\n{result}"
    except Exception as e:
        # Fallback to dictionary-based
        return f"⚠️ AI model unavailable, using dictionary:\n\n{translate_to_algospeak_simple(text)}"

def translate_to_algospeak(text):
    """Translates normal text to AlgoSpeak - Hybrid approach"""
    if not text or not text.strip():
        return "⚠️ Please enter some text to translate."
    
    # Try AI first for better context
    dict_result = translate_to_algospeak_simple(text)
    
    # If dictionary found terms, try AI enhancement
    if "No sensitive terms detected" not in dict_result:
        ai_result = translate_to_algospeak_ai(text)
        return f"{ai_result}\n\n---\n\n📖 Dictionary version:\n{dict_result}"
    
    # If no terms found, just return dictionary result
    return dict_result

def interpret_algospeak_simple(text):
    """Rule-based interpretation using the dictionary (always works!)"""
    result = text
    found_terms = []
    
    # Direct mapping from algospeak -> meaning
    for term, meaning in ALGOSPEAK_DICT.items():
        # Case-insensitive search
        pattern = re.compile(re.escape(term), re.IGNORECASE)
        if pattern.search(result):
            found_terms.append(f"'{term}' → {meaning}")
            result = pattern.sub(f"[{meaning}]", result)
    
    if not found_terms:
        return f"✨ No AlgoSpeak terms detected!\n\nOriginal: {text}\n\n💡 Tip: Try terms like 'unalive', 'seggs', 'pew pew', 'mascara', etc."
    
    explanation = "\n".join(found_terms)
    return f"🔍 Interpreted:\n\n{result}\n\n📖 Terms found:\n{explanation}"

def interpret_algospeak_ai(text):
    """AI-powered interpretation with context awareness"""
    prompt = f"""Task: Translate AlgoSpeak (coded language) to plain English.

Examples:
"someone unalived" → "someone died/killed themselves"
"talking about seggs" → "talking about sex"
"the cornucopia continues" → "the war continues"
"got my backshot for mascara" → "got my vaccine for COVID"

Now translate: {text}

Plain English:"""

    try:
        response = client.text_generation(
            prompt,
            model=MODEL,
            max_new_tokens=100,
            temperature=0.5,
            do_sample=True,
            top_p=0.9,
        )
        result = response.strip()
        if result.startswith('"') and result.endswith('"'):
            result = result[1:-1]
        return f"🤖 AI Interpretation:\n\n{result}"
    except Exception as e:
        return f"⚠️ AI model unavailable, using dictionary:\n\n{interpret_algospeak_simple(text)}"

def interpret_algospeak(text):
    """Interprets AlgoSpeak to plain language - Hybrid approach"""
    if not text or not text.strip():
        return "⚠️ Please enter some AlgoSpeak text to interpret."
    
    # Get dictionary result
    dict_result = interpret_algospeak_simple(text)
    
    # If dictionary found terms, add AI interpretation
    if "No AlgoSpeak terms detected" not in dict_result:
        ai_result = interpret_algospeak_ai(text)
        return f"{ai_result}\n\n---\n\n{dict_result}"
    
    return dict_result

def search_dictionary(query):
    """Search terms in the AlgoSpeak dictionary"""
    query = query.lower().strip()
    results = []
    
    for term, meaning in ALGOSPEAK_DICT.items():
        if query in term.lower() or query in meaning.lower():
            results.append(f"**{term}** → {meaning}")
    
    if not results:
        return "❌ No terms found. Try another search!"
    
    return "\n\n".join(results)

# Interface Gradio
with gr.Blocks(theme=gr.themes.Soft(), title="AlgoSpeak AI") as demo:
    gr.Markdown(f"""
    # 🗣️ AlgoSpeak AI Translator
    
    **AlgoSpeak** is a language used to circumvent content moderation algorithms on social media platforms.
    
    This tool can:
    - 📝 Translate plain text → AlgoSpeak (AI + dictionary hybrid)
    - 🔍 Interpret AlgoSpeak → plain language (AI-enhanced)
    - 📖 Search through 60+ catalogued terms
    
    🤖 **Powered by:** GPT-2 AI model + curated dictionary
    💡 **Hybrid approach:** AI for context + dictionary for accuracy
    """)
    
    with gr.Tab("🌐 Translate to AlgoSpeak"):
        with gr.Row():
            with gr.Column():
                input_normal = gr.Textbox(
                    label="Plain Text",
                    placeholder="Type something...",
                    lines=3
                )
                btn_translate = gr.Button("Translate to AlgoSpeak", variant="primary")
            with gr.Column():
                output_algospeak = gr.Textbox(
                    label="AlgoSpeak Result",
                    lines=3
                )
        
        gr.Examples(
            examples=[
                ["Let's talk about mental health and teenage suicide"],
                ["The war in the region had many shootings and civilian deaths"],
                ["We need to discuss LGBTQ+ rights and responsible adult content"],
                ["COVID-19 caused millions of deaths in the pandemic"],
                ["Sex workers deserve rights and protection"]
            ],
            inputs=input_normal
        )
        
        btn_translate.click(
            translate_to_algospeak,
            inputs=input_normal,
            outputs=output_algospeak
        )
    
    with gr.Tab("🔍 Interpret AlgoSpeak"):
        with gr.Row():
            with gr.Column():
                input_algospeak = gr.Textbox(
                    label="AlgoSpeak Text",
                    placeholder="Paste AlgoSpeak text...",
                    lines=3
                )
                btn_interpret = gr.Button("Interpret", variant="primary")
            with gr.Column():
                output_normal = gr.Textbox(
                    label="Plain Language",
                    lines=3
                )
        
        gr.Examples(
            examples=[
                ["Someone tried to unalive themselves"],
                ["The seggs worker talked about their job"],
                ["There was a cornucopia with many pew pews"],
                ["Got my backshot today, feeling safer from the mascara"],
                ["The accountant shared spicy content on the corn site"]
            ],
            inputs=input_algospeak
        )
        
        btn_interpret.click(
            interpret_algospeak,
            inputs=input_algospeak,
            outputs=output_normal
        )
    
    with gr.Tab("📖 Dictionary Search"):
        gr.Markdown("""
        ### Explore the AlgoSpeak dictionary
        Search by coded term OR by real meaning.
        """)
        
        with gr.Row():
            with gr.Column():
                search_input = gr.Textbox(
                    label="Type your search",
                    placeholder="e.g. unalive, sex, weapon...",
                    lines=1
                )
                btn_search = gr.Button("🔍 Search", variant="primary")
            with gr.Column():
                search_output = gr.Markdown(
                    label="Results",
                    value=f"💡 Tip: Type any word to search!\n\n**Total terms in dictionary:** {len(ALGOSPEAK_DICT)}"
                )
        
        gr.Examples(
            examples=[
                ["unalive"],
                ["sex"],
                ["weapon"],
                ["COVID"],
                ["LGBTQ"]
            ],
            inputs=search_input
        )
        
        btn_search.click(
            search_dictionary,
            inputs=search_input,
            outputs=search_output
        )
    
    gr.Markdown("""
    ---
    ### ℹ️ About AlgoSpeak
    AlgoSpeak is a form of linguistic resistance against algorithmic censorship.
    Learn more at [algospeak.net](https://www.algospeak.net/)
    
    **Note**: This is an educational prototype developed during a workshop.
    """)

if __name__ == "__main__":
    demo.launch()