---
license: apache-2.0
datasets:
- anon8231489123/Omegle_logs_dataset
language:
- en
pipeline_tag: text-generation
---

# OmegLSTM
OmegLSTM is a lstm-based character level text generation network trained on chat logs from the now nonfunctional random chat website "Omegle"

## WARNING!!!!!!
This model is **will** produce **highly sexual** content. Do not if you are a child under 18!!!!

## Prompt format
```
Interests: interests seperated by space, or leave empty
You: ...
Stranger: ...
etc
```

## Architecture
This model uses an (as far as I am aware) novel architecture which stacks the lstms in parralel, then in sequence like in transformer blocks.
It has 4,987,763 parameters.

## Inference code
You need to place this in a python file in the same folder as the .pt file and then run it. change `device = torch.device("cuda:0")` to `device = torch.device("cpu")` for CPU inference
```python
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
from einops import rearrange
import pandas as pd
import torch.nn.functional as F
device = torch.device("cuda:0")

class MHL(nn.Module):
    def __init__(self, embed_dim,heads):
        super(MHL, self).__init__()
        self.lstms = nn.ModuleList([nn.LSTM(embed_dim,embed_dim,batch_first=True) for i in range(heads)])
        self.ln = nn.LayerNorm(embed_dim)
    def forward(self,x):
        combo = [lstm(x)[0] for lstm in self.lstms]
        combo = torch.sum(torch.stack(combo), dim=0)
        return self.ln(combo)
class SLSTM_block(nn.Module):
    def __init__(self, embed_dim,heads):
        super(SLSTM_block, self).__init__()
        self.lstms = MHL(embed_dim,heads)
        self.c1 = nn.Conv1d(embed_dim,embed_dim,1,1)
        self.c2 = nn.Conv1d(embed_dim,embed_dim,1,1)
        self.ac = nn.GELU()
        self.norm = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
    def forward(self, x):
        skip = x
        x = self.norm(x)
        x= self.lstms(x)
        x = x + skip
        skip = x
        x = self.norm2(x)
        x = rearrange(x,"b l c -> b c l")
        x = self.c1(x)
        x = self.ac(x)
        x = self.c2(x)
        x = self.ac(x)
        x = rearrange(x,"b c l -> b l c")
        x = x + skip
        return x

class CharGenModel(nn.Module):
    def __init__(self, vocab_size, embed_dim,layers,heads):
        super(CharGenModel, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        
        self.blocks = nn.ModuleList([SLSTM_block(embed_dim,heads) for i in range(layers)])
        # Output layer to predict next character
        self.fc = nn.Linear(embed_dim, vocab_size)
    
    def forward(self, x):
        # x: (batch_size, sequence_length)
        
        # Get embeddings
        x = self.embedding(x)  # (batch_size, sequence_length, embed_dim)
        
        for block in self.blocks:
            x = block(x)

        # Output layer
        x = self.fc(x)  # (batch_size, sequence_length, vocab_size)
        return x
# Helper function to generate text
def generate_text(model, char_to_idx, idx_to_char, start_text, gen_length=100, temperature=1.0, top_k=10):
    model.eval()
    input_seq = torch.tensor([char_to_idx[char] for char in start_text], dtype=torch.long).unsqueeze(0).to(device)
    generated_text = start_text
    
    for _ in tqdm(range(gen_length)):
        with torch.no_grad():
            # Get the output for the last 128 tokens
            output = model(input_seq[:, -128:])
            
            # Get the logits for the last timestep
            logits = output[:, -1, :]
            
            # Apply temperature scaling
            logits = logits / temperature
            
            # Apply top-k filtering
            if top_k > 0:
                values, indices = torch.topk(logits, top_k)
                logits_top_k = torch.zeros_like(logits).fill_(-float('Inf'))
                logits_top_k.scatter_(1, indices, values)
                logits = logits_top_k
            
            # Apply softmax to get probabilities
            probabilities = F.softmax(logits, dim=-1)
            
            # Sample the next character index from the probability distribution
            next_char_idx = torch.multinomial(probabilities, num_samples=1).item()
            next_char = idx_to_char[next_char_idx]
            
            # Append the generated character to the result
            generated_text += next_char
            
            # Update the input sequence
            input_seq = torch.cat((input_seq, torch.tensor([[next_char_idx]]).to(device)), dim=1)
    
    model.train()
    return generated_text


chars = ['\n', ' ', '!', '"', '#', '$', '&', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '|', '\x86', '\x8d', '\x91', '\x9f', '©', 'ª', '\xad', '´', 'µ', '¸', '½', '¿', 'æ', 'ï', 'ð', 'č', 'ļ', 'Ś', 'Ÿ', 'Ż', 'জ', 'ত', 
'ন', 'া', 'ে', 'ো', '‡', '√', '┐', '�']
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for char, idx in char_to_idx.items()}


vocab_size = len(chars)
embed_dim = 128
model = CharGenModel(vocab_size, embed_dim,6,6).to(device)


model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(f"Model has {round(params/1000000,3)}M parameters")
print(f"Model has {params} parameters")

model.load_state_dict(torch.load("OmegLSTM.pt"))
print(generate_text(model,char_to_idx,idx_to_char,"""Interests: \nYou: Hello\nStranger: """,gen_length=128,temperature=0.7,top_k=4))
```

## Training
The model was trained over 10 epochs on the entire dataset.
Loss Graph:
![image/png](https://cdn-uploads.huggingface.co/production/uploads/6316fb937b0ee0136e5f1220/7_oHKhzm4aS71tVUgD3B2.png)