algospeak / app.py
travahacker
Add AI model: Hybrid GPT-2 + Dictionary (context-aware translations!)
c300f88
import gradio as gr
from huggingface_hub import InferenceClient
from algospeak_dictionary import get_algospeak_context, ALGOSPEAK_DICT
import os
import re
# Initialize inference client with HF token (automatically provided in Spaces)
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
client = InferenceClient(token=hf_token)
# Complete AlgoSpeak dictionary for prompts
ALGOSPEAK_EXAMPLES = get_algospeak_context()
# Model selection - using models that work reliably with HF Inference API
# GPT-2 is older but 100% reliable for text generation
MODEL = "gpt2-medium" # Smaller, faster, always works
BACKUP_MODEL = "distilgpt2" # Even smaller fallback
def translate_to_algospeak_simple(text):
"""Rule-based translation using the dictionary (always works!)"""
result = text
# Create reverse mapping: meaning -> algospeak term
replacements = {
# Death and violence
"suicide": "sewerslide",
"kill": "unalive",
"killing": "unaliving",
"killed": "unalived",
"die": "unalive",
"died": "unalived",
"death": "unalive",
"dead": "unalived",
# Weapons and war
"gun": "pew pew",
"guns": "pew pews",
"shooting": "pew pew",
"shot": "pew pew",
"weapon": "noodle",
"weapons": "noodles",
"war": "cornucopia",
"bomb": "kaboom",
# Adult content
"sex": "seggs",
"sexual": "seggs",
"porn": "corn",
"pornography": "corn",
"lesbian": "le$bian",
"gay": "g@y",
# Health
"COVID": "mascara",
"COVID-19": "mascara",
"coronavirus": "mascara",
"vaccine": "backshot",
"pandemic": "panini",
# LGBTQ+
"LGBTQ": "leg booty",
"LGBTQ+": "leg booty",
# Sexual violence
"rape": "grape",
"sexual assault": "SA",
# Other
"sex worker": "accountant",
"stripper": "skripper",
"marijuana": "lettuce",
"weed": "lettuce",
}
# Sort by length (longest first) to avoid partial replacements
for original, coded in sorted(replacements.items(), key=lambda x: len(x[0]), reverse=True):
# Case-insensitive replacement
pattern = re.compile(re.escape(original), re.IGNORECASE)
result = pattern.sub(coded, result)
if result == text:
return f"✨ No sensitive terms detected!\n\nOriginal: {text}\n\nπŸ’‘ Tip: Try terms like 'suicide', 'sex', 'war', 'COVID', etc."
return f"πŸ”„ Translated to AlgoSpeak:\n\n{result}"
def translate_to_algospeak_ai(text):
"""AI-powered translation with context awareness"""
prompt = f"""Task: Convert text to AlgoSpeak (coded language to avoid censorship).
Examples:
"someone died" β†’ "someone unalived"
"talking about sex" β†’ "talking about seggs"
"the war continues" β†’ "the cornucopia continues"
"COVID vaccine" β†’ "mascara backshot"
Now convert: {text}
AlgoSpeak:"""
try:
response = client.text_generation(
prompt,
model=MODEL,
max_new_tokens=100,
temperature=0.7,
do_sample=True,
top_p=0.9,
)
# Clean up response
result = response.strip()
# Remove common artifacts
if result.startswith('"') and result.endswith('"'):
result = result[1:-1]
return f"πŸ€– AI Translation:\n\n{result}"
except Exception as e:
# Fallback to dictionary-based
return f"⚠️ AI model unavailable, using dictionary:\n\n{translate_to_algospeak_simple(text)}"
def translate_to_algospeak(text):
"""Translates normal text to AlgoSpeak - Hybrid approach"""
if not text or not text.strip():
return "⚠️ Please enter some text to translate."
# Try AI first for better context
dict_result = translate_to_algospeak_simple(text)
# If dictionary found terms, try AI enhancement
if "No sensitive terms detected" not in dict_result:
ai_result = translate_to_algospeak_ai(text)
return f"{ai_result}\n\n---\n\nπŸ“– Dictionary version:\n{dict_result}"
# If no terms found, just return dictionary result
return dict_result
def interpret_algospeak_simple(text):
"""Rule-based interpretation using the dictionary (always works!)"""
result = text
found_terms = []
# Direct mapping from algospeak -> meaning
for term, meaning in ALGOSPEAK_DICT.items():
# Case-insensitive search
pattern = re.compile(re.escape(term), re.IGNORECASE)
if pattern.search(result):
found_terms.append(f"'{term}' β†’ {meaning}")
result = pattern.sub(f"[{meaning}]", result)
if not found_terms:
return f"✨ No AlgoSpeak terms detected!\n\nOriginal: {text}\n\nπŸ’‘ Tip: Try terms like 'unalive', 'seggs', 'pew pew', 'mascara', etc."
explanation = "\n".join(found_terms)
return f"πŸ” Interpreted:\n\n{result}\n\nπŸ“– Terms found:\n{explanation}"
def interpret_algospeak_ai(text):
"""AI-powered interpretation with context awareness"""
prompt = f"""Task: Translate AlgoSpeak (coded language) to plain English.
Examples:
"someone unalived" β†’ "someone died/killed themselves"
"talking about seggs" β†’ "talking about sex"
"the cornucopia continues" β†’ "the war continues"
"got my backshot for mascara" β†’ "got my vaccine for COVID"
Now translate: {text}
Plain English:"""
try:
response = client.text_generation(
prompt,
model=MODEL,
max_new_tokens=100,
temperature=0.5,
do_sample=True,
top_p=0.9,
)
result = response.strip()
if result.startswith('"') and result.endswith('"'):
result = result[1:-1]
return f"πŸ€– AI Interpretation:\n\n{result}"
except Exception as e:
return f"⚠️ AI model unavailable, using dictionary:\n\n{interpret_algospeak_simple(text)}"
def interpret_algospeak(text):
"""Interprets AlgoSpeak to plain language - Hybrid approach"""
if not text or not text.strip():
return "⚠️ Please enter some AlgoSpeak text to interpret."
# Get dictionary result
dict_result = interpret_algospeak_simple(text)
# If dictionary found terms, add AI interpretation
if "No AlgoSpeak terms detected" not in dict_result:
ai_result = interpret_algospeak_ai(text)
return f"{ai_result}\n\n---\n\n{dict_result}"
return dict_result
def search_dictionary(query):
"""Search terms in the AlgoSpeak dictionary"""
query = query.lower().strip()
results = []
for term, meaning in ALGOSPEAK_DICT.items():
if query in term.lower() or query in meaning.lower():
results.append(f"**{term}** β†’ {meaning}")
if not results:
return "❌ No terms found. Try another search!"
return "\n\n".join(results)
# Interface Gradio
with gr.Blocks(theme=gr.themes.Soft(), title="AlgoSpeak AI") as demo:
gr.Markdown(f"""
# πŸ—£οΈ AlgoSpeak AI Translator
**AlgoSpeak** is a language used to circumvent content moderation algorithms on social media platforms.
This tool can:
- πŸ“ Translate plain text β†’ AlgoSpeak (AI + dictionary hybrid)
- πŸ” Interpret AlgoSpeak β†’ plain language (AI-enhanced)
- πŸ“– Search through 60+ catalogued terms
πŸ€– **Powered by:** GPT-2 AI model + curated dictionary
πŸ’‘ **Hybrid approach:** AI for context + dictionary for accuracy
""")
with gr.Tab("🌐 Translate to AlgoSpeak"):
with gr.Row():
with gr.Column():
input_normal = gr.Textbox(
label="Plain Text",
placeholder="Type something...",
lines=3
)
btn_translate = gr.Button("Translate to AlgoSpeak", variant="primary")
with gr.Column():
output_algospeak = gr.Textbox(
label="AlgoSpeak Result",
lines=3
)
gr.Examples(
examples=[
["Let's talk about mental health and teenage suicide"],
["The war in the region had many shootings and civilian deaths"],
["We need to discuss LGBTQ+ rights and responsible adult content"],
["COVID-19 caused millions of deaths in the pandemic"],
["Sex workers deserve rights and protection"]
],
inputs=input_normal
)
btn_translate.click(
translate_to_algospeak,
inputs=input_normal,
outputs=output_algospeak
)
with gr.Tab("πŸ” Interpret AlgoSpeak"):
with gr.Row():
with gr.Column():
input_algospeak = gr.Textbox(
label="AlgoSpeak Text",
placeholder="Paste AlgoSpeak text...",
lines=3
)
btn_interpret = gr.Button("Interpret", variant="primary")
with gr.Column():
output_normal = gr.Textbox(
label="Plain Language",
lines=3
)
gr.Examples(
examples=[
["Someone tried to unalive themselves"],
["The seggs worker talked about their job"],
["There was a cornucopia with many pew pews"],
["Got my backshot today, feeling safer from the mascara"],
["The accountant shared spicy content on the corn site"]
],
inputs=input_algospeak
)
btn_interpret.click(
interpret_algospeak,
inputs=input_algospeak,
outputs=output_normal
)
with gr.Tab("πŸ“– Dictionary Search"):
gr.Markdown("""
### Explore the AlgoSpeak dictionary
Search by coded term OR by real meaning.
""")
with gr.Row():
with gr.Column():
search_input = gr.Textbox(
label="Type your search",
placeholder="e.g. unalive, sex, weapon...",
lines=1
)
btn_search = gr.Button("πŸ” Search", variant="primary")
with gr.Column():
search_output = gr.Markdown(
label="Results",
value=f"πŸ’‘ Tip: Type any word to search!\n\n**Total terms in dictionary:** {len(ALGOSPEAK_DICT)}"
)
gr.Examples(
examples=[
["unalive"],
["sex"],
["weapon"],
["COVID"],
["LGBTQ"]
],
inputs=search_input
)
btn_search.click(
search_dictionary,
inputs=search_input,
outputs=search_output
)
gr.Markdown("""
---
### ℹ️ About AlgoSpeak
AlgoSpeak is a form of linguistic resistance against algorithmic censorship.
Learn more at [algospeak.net](https://www.algospeak.net/)
**Note**: This is an educational prototype developed during a workshop.
""")
if __name__ == "__main__":
demo.launch()