--- license: apache-2.0 datasets: - anon8231489123/Omegle_logs_dataset language: - en pipeline_tag: text-generation --- # OmegLSTM OmegLSTM is a lstm-based character level text generation network trained on chat logs from the now nonfunctional random chat website "Omegle" ## WARNING!!!!!! This model is **will** produce **highly sexual** content. Do not if you are a child under 18!!!! ## Prompt format ``` Interests: interests seperated by space, or leave empty You: ... Stranger: ... etc ``` ## Architecture This model uses an (as far as I am aware) novel architecture which stacks the lstms in parralel, then in sequence like in transformer blocks. It has 4,987,763 parameters. ## Inference code You need to place this in a python file in the same folder as the .pt file and then run it. change `device = torch.device("cuda:0")` to `device = torch.device("cpu")` for CPU inference ```python import torch import torch.nn as nn import numpy as np from tqdm import tqdm from einops import rearrange import pandas as pd import torch.nn.functional as F device = torch.device("cuda:0") class MHL(nn.Module): def __init__(self, embed_dim,heads): super(MHL, self).__init__() self.lstms = nn.ModuleList([nn.LSTM(embed_dim,embed_dim,batch_first=True) for i in range(heads)]) self.ln = nn.LayerNorm(embed_dim) def forward(self,x): combo = [lstm(x)[0] for lstm in self.lstms] combo = torch.sum(torch.stack(combo), dim=0) return self.ln(combo) class SLSTM_block(nn.Module): def __init__(self, embed_dim,heads): super(SLSTM_block, self).__init__() self.lstms = MHL(embed_dim,heads) self.c1 = nn.Conv1d(embed_dim,embed_dim,1,1) self.c2 = nn.Conv1d(embed_dim,embed_dim,1,1) self.ac = nn.GELU() self.norm = nn.LayerNorm(embed_dim) self.norm2 = nn.LayerNorm(embed_dim) def forward(self, x): skip = x x = self.norm(x) x= self.lstms(x) x = x + skip skip = x x = self.norm2(x) x = rearrange(x,"b l c -> b c l") x = self.c1(x) x = self.ac(x) x = self.c2(x) x = self.ac(x) x = rearrange(x,"b c l -> b l c") x = x + skip return x class CharGenModel(nn.Module): def __init__(self, vocab_size, embed_dim,layers,heads): super(CharGenModel, self).__init__() # Embedding layer self.embedding = nn.Embedding(vocab_size, embed_dim) self.blocks = nn.ModuleList([SLSTM_block(embed_dim,heads) for i in range(layers)]) # Output layer to predict next character self.fc = nn.Linear(embed_dim, vocab_size) def forward(self, x): # x: (batch_size, sequence_length) # Get embeddings x = self.embedding(x) # (batch_size, sequence_length, embed_dim) for block in self.blocks: x = block(x) # Output layer x = self.fc(x) # (batch_size, sequence_length, vocab_size) return x # Helper function to generate text def generate_text(model, char_to_idx, idx_to_char, start_text, gen_length=100, temperature=1.0, top_k=10): model.eval() input_seq = torch.tensor([char_to_idx[char] for char in start_text], dtype=torch.long).unsqueeze(0).to(device) generated_text = start_text for _ in tqdm(range(gen_length)): with torch.no_grad(): # Get the output for the last 128 tokens output = model(input_seq[:, -128:]) # Get the logits for the last timestep logits = output[:, -1, :] # Apply temperature scaling logits = logits / temperature # Apply top-k filtering if top_k > 0: values, indices = torch.topk(logits, top_k) logits_top_k = torch.zeros_like(logits).fill_(-float('Inf')) logits_top_k.scatter_(1, indices, values) logits = logits_top_k # Apply softmax to get probabilities probabilities = F.softmax(logits, dim=-1) # Sample the next character index from the probability distribution next_char_idx = torch.multinomial(probabilities, num_samples=1).item() next_char = idx_to_char[next_char_idx] # Append the generated character to the result generated_text += next_char # Update the input sequence input_seq = torch.cat((input_seq, torch.tensor([[next_char_idx]]).to(device)), dim=1) model.train() return generated_text chars = ['\n', ' ', '!', '"', '#', '$', '&', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '|', '\x86', '\x8d', '\x91', '\x9f', '©', 'ª', '\xad', '´', 'µ', '¸', '½', '¿', 'æ', 'ï', 'ð', 'č', 'ļ', 'Ś', 'Ÿ', 'Ż', 'জ', 'ত', 'ন', 'া', 'ে', 'ো', '‡', '√', '┐', '�'] char_to_idx = {char: idx for idx, char in enumerate(chars)} idx_to_char = {idx: char for char, idx in char_to_idx.items()} vocab_size = len(chars) embed_dim = 128 model = CharGenModel(vocab_size, embed_dim,6,6).to(device) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print(f"Model has {round(params/1000000,3)}M parameters") print(f"Model has {params} parameters") model.load_state_dict(torch.load("OmegLSTM.pt")) print(generate_text(model,char_to_idx,idx_to_char,"""Interests: \nYou: Hello\nStranger: """,gen_length=128,temperature=0.7,top_k=4)) ``` ## Training The model was trained over 10 epochs on the entire dataset. Loss Graph: ![image/png](https://cdn-uploads.huggingface.co/production/uploads/6316fb937b0ee0136e5f1220/7_oHKhzm4aS71tVUgD3B2.png)