File size: 11,463 Bytes
8febfa7
 
 
d3d6e15
72297a2
8febfa7
d3d6e15
 
 
8febfa7
 
 
 
c300f88
 
 
 
d3d6e15
72297a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c300f88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8febfa7
c300f88
72297a2
 
 
c300f88
 
 
 
 
 
 
 
 
 
8febfa7
72297a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8febfa7
c300f88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8febfa7
c300f88
72297a2
 
 
c300f88
 
 
 
 
 
 
 
 
8febfa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c300f88
 
8febfa7
 
 
72297a2
c300f88
 
d3d6e15
 
c300f88
 
8febfa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
import gradio as gr
from huggingface_hub import InferenceClient
from algospeak_dictionary import get_algospeak_context, ALGOSPEAK_DICT
import os
import re

# Initialize inference client with HF token (automatically provided in Spaces)
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
client = InferenceClient(token=hf_token)

# Complete AlgoSpeak dictionary for prompts
ALGOSPEAK_EXAMPLES = get_algospeak_context()

# Model selection - using models that work reliably with HF Inference API
# GPT-2 is older but 100% reliable for text generation
MODEL = "gpt2-medium"  # Smaller, faster, always works
BACKUP_MODEL = "distilgpt2"  # Even smaller fallback

def translate_to_algospeak_simple(text):
    """Rule-based translation using the dictionary (always works!)"""
    result = text
    
    # Create reverse mapping: meaning -> algospeak term
    replacements = {
        # Death and violence
        "suicide": "sewerslide",
        "kill": "unalive",
        "killing": "unaliving",
        "killed": "unalived",
        "die": "unalive",
        "died": "unalived",
        "death": "unalive",
        "dead": "unalived",
        
        # Weapons and war
        "gun": "pew pew",
        "guns": "pew pews",
        "shooting": "pew pew",
        "shot": "pew pew",
        "weapon": "noodle",
        "weapons": "noodles",
        "war": "cornucopia",
        "bomb": "kaboom",
        
        # Adult content
        "sex": "seggs",
        "sexual": "seggs",
        "porn": "corn",
        "pornography": "corn",
        "lesbian": "le$bian",
        "gay": "g@y",
        
        # Health
        "COVID": "mascara",
        "COVID-19": "mascara",
        "coronavirus": "mascara",
        "vaccine": "backshot",
        "pandemic": "panini",
        
        # LGBTQ+
        "LGBTQ": "leg booty",
        "LGBTQ+": "leg booty",
        
        # Sexual violence
        "rape": "grape",
        "sexual assault": "SA",
        
        # Other
        "sex worker": "accountant",
        "stripper": "skripper",
        "marijuana": "lettuce",
        "weed": "lettuce",
    }
    
    # Sort by length (longest first) to avoid partial replacements
    for original, coded in sorted(replacements.items(), key=lambda x: len(x[0]), reverse=True):
        # Case-insensitive replacement
        pattern = re.compile(re.escape(original), re.IGNORECASE)
        result = pattern.sub(coded, result)
    
    if result == text:
        return f"✨ No sensitive terms detected!\n\nOriginal: {text}\n\nπŸ’‘ Tip: Try terms like 'suicide', 'sex', 'war', 'COVID', etc."
    
    return f"πŸ”„ Translated to AlgoSpeak:\n\n{result}"

def translate_to_algospeak_ai(text):
    """AI-powered translation with context awareness"""
    prompt = f"""Task: Convert text to AlgoSpeak (coded language to avoid censorship).

Examples:
"someone died" β†’ "someone unalived"
"talking about sex" β†’ "talking about seggs"
"the war continues" β†’ "the cornucopia continues"
"COVID vaccine" β†’ "mascara backshot"

Now convert: {text}

AlgoSpeak:"""

    try:
        response = client.text_generation(
            prompt,
            model=MODEL,
            max_new_tokens=100,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
        )
        # Clean up response
        result = response.strip()
        # Remove common artifacts
        if result.startswith('"') and result.endswith('"'):
            result = result[1:-1]
        return f"πŸ€– AI Translation:\n\n{result}"
    except Exception as e:
        # Fallback to dictionary-based
        return f"⚠️ AI model unavailable, using dictionary:\n\n{translate_to_algospeak_simple(text)}"

def translate_to_algospeak(text):
    """Translates normal text to AlgoSpeak - Hybrid approach"""
    if not text or not text.strip():
        return "⚠️ Please enter some text to translate."
    
    # Try AI first for better context
    dict_result = translate_to_algospeak_simple(text)
    
    # If dictionary found terms, try AI enhancement
    if "No sensitive terms detected" not in dict_result:
        ai_result = translate_to_algospeak_ai(text)
        return f"{ai_result}\n\n---\n\nπŸ“– Dictionary version:\n{dict_result}"
    
    # If no terms found, just return dictionary result
    return dict_result

def interpret_algospeak_simple(text):
    """Rule-based interpretation using the dictionary (always works!)"""
    result = text
    found_terms = []
    
    # Direct mapping from algospeak -> meaning
    for term, meaning in ALGOSPEAK_DICT.items():
        # Case-insensitive search
        pattern = re.compile(re.escape(term), re.IGNORECASE)
        if pattern.search(result):
            found_terms.append(f"'{term}' β†’ {meaning}")
            result = pattern.sub(f"[{meaning}]", result)
    
    if not found_terms:
        return f"✨ No AlgoSpeak terms detected!\n\nOriginal: {text}\n\nπŸ’‘ Tip: Try terms like 'unalive', 'seggs', 'pew pew', 'mascara', etc."
    
    explanation = "\n".join(found_terms)
    return f"πŸ” Interpreted:\n\n{result}\n\nπŸ“– Terms found:\n{explanation}"

def interpret_algospeak_ai(text):
    """AI-powered interpretation with context awareness"""
    prompt = f"""Task: Translate AlgoSpeak (coded language) to plain English.

Examples:
"someone unalived" β†’ "someone died/killed themselves"
"talking about seggs" β†’ "talking about sex"
"the cornucopia continues" β†’ "the war continues"
"got my backshot for mascara" β†’ "got my vaccine for COVID"

Now translate: {text}

Plain English:"""

    try:
        response = client.text_generation(
            prompt,
            model=MODEL,
            max_new_tokens=100,
            temperature=0.5,
            do_sample=True,
            top_p=0.9,
        )
        result = response.strip()
        if result.startswith('"') and result.endswith('"'):
            result = result[1:-1]
        return f"πŸ€– AI Interpretation:\n\n{result}"
    except Exception as e:
        return f"⚠️ AI model unavailable, using dictionary:\n\n{interpret_algospeak_simple(text)}"

def interpret_algospeak(text):
    """Interprets AlgoSpeak to plain language - Hybrid approach"""
    if not text or not text.strip():
        return "⚠️ Please enter some AlgoSpeak text to interpret."
    
    # Get dictionary result
    dict_result = interpret_algospeak_simple(text)
    
    # If dictionary found terms, add AI interpretation
    if "No AlgoSpeak terms detected" not in dict_result:
        ai_result = interpret_algospeak_ai(text)
        return f"{ai_result}\n\n---\n\n{dict_result}"
    
    return dict_result

def search_dictionary(query):
    """Search terms in the AlgoSpeak dictionary"""
    query = query.lower().strip()
    results = []
    
    for term, meaning in ALGOSPEAK_DICT.items():
        if query in term.lower() or query in meaning.lower():
            results.append(f"**{term}** β†’ {meaning}")
    
    if not results:
        return "❌ No terms found. Try another search!"
    
    return "\n\n".join(results)

# Interface Gradio
with gr.Blocks(theme=gr.themes.Soft(), title="AlgoSpeak AI") as demo:
    gr.Markdown(f"""
    # πŸ—£οΈ AlgoSpeak AI Translator
    
    **AlgoSpeak** is a language used to circumvent content moderation algorithms on social media platforms.
    
    This tool can:
    - πŸ“ Translate plain text β†’ AlgoSpeak (AI + dictionary hybrid)
    - πŸ” Interpret AlgoSpeak β†’ plain language (AI-enhanced)
    - πŸ“– Search through 60+ catalogued terms
    
    πŸ€– **Powered by:** GPT-2 AI model + curated dictionary
    πŸ’‘ **Hybrid approach:** AI for context + dictionary for accuracy
    """)
    
    with gr.Tab("🌐 Translate to AlgoSpeak"):
        with gr.Row():
            with gr.Column():
                input_normal = gr.Textbox(
                    label="Plain Text",
                    placeholder="Type something...",
                    lines=3
                )
                btn_translate = gr.Button("Translate to AlgoSpeak", variant="primary")
            with gr.Column():
                output_algospeak = gr.Textbox(
                    label="AlgoSpeak Result",
                    lines=3
                )
        
        gr.Examples(
            examples=[
                ["Let's talk about mental health and teenage suicide"],
                ["The war in the region had many shootings and civilian deaths"],
                ["We need to discuss LGBTQ+ rights and responsible adult content"],
                ["COVID-19 caused millions of deaths in the pandemic"],
                ["Sex workers deserve rights and protection"]
            ],
            inputs=input_normal
        )
        
        btn_translate.click(
            translate_to_algospeak,
            inputs=input_normal,
            outputs=output_algospeak
        )
    
    with gr.Tab("πŸ” Interpret AlgoSpeak"):
        with gr.Row():
            with gr.Column():
                input_algospeak = gr.Textbox(
                    label="AlgoSpeak Text",
                    placeholder="Paste AlgoSpeak text...",
                    lines=3
                )
                btn_interpret = gr.Button("Interpret", variant="primary")
            with gr.Column():
                output_normal = gr.Textbox(
                    label="Plain Language",
                    lines=3
                )
        
        gr.Examples(
            examples=[
                ["Someone tried to unalive themselves"],
                ["The seggs worker talked about their job"],
                ["There was a cornucopia with many pew pews"],
                ["Got my backshot today, feeling safer from the mascara"],
                ["The accountant shared spicy content on the corn site"]
            ],
            inputs=input_algospeak
        )
        
        btn_interpret.click(
            interpret_algospeak,
            inputs=input_algospeak,
            outputs=output_normal
        )
    
    with gr.Tab("πŸ“– Dictionary Search"):
        gr.Markdown("""
        ### Explore the AlgoSpeak dictionary
        Search by coded term OR by real meaning.
        """)
        
        with gr.Row():
            with gr.Column():
                search_input = gr.Textbox(
                    label="Type your search",
                    placeholder="e.g. unalive, sex, weapon...",
                    lines=1
                )
                btn_search = gr.Button("πŸ” Search", variant="primary")
            with gr.Column():
                search_output = gr.Markdown(
                    label="Results",
                    value=f"πŸ’‘ Tip: Type any word to search!\n\n**Total terms in dictionary:** {len(ALGOSPEAK_DICT)}"
                )
        
        gr.Examples(
            examples=[
                ["unalive"],
                ["sex"],
                ["weapon"],
                ["COVID"],
                ["LGBTQ"]
            ],
            inputs=search_input
        )
        
        btn_search.click(
            search_dictionary,
            inputs=search_input,
            outputs=search_output
        )
    
    gr.Markdown("""
    ---
    ### ℹ️ About AlgoSpeak
    AlgoSpeak is a form of linguistic resistance against algorithmic censorship.
    Learn more at [algospeak.net](https://www.algospeak.net/)
    
    **Note**: This is an educational prototype developed during a workshop.
    """)

if __name__ == "__main__":
    demo.launch()