Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| import torch.nn as nn | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| import re | |
| import string | |
| from collections import Counter | |
| import numpy as np | |
| from typing import List | |
| import time | |
| # Загрузка предобученной модели | |
| class BahdanauAttention(nn.Module): | |
| def __init__(self, hidden_size: int): | |
| super().__init__() | |
| self.Wa = nn.Linear(hidden_size, hidden_size) | |
| self.Wk = nn.Linear(hidden_size, hidden_size) | |
| self.Wv = nn.Linear(hidden_size, 1) | |
| def forward(self, query, keys): | |
| query = query.unsqueeze(1) # (batch_size, 1, hidden_size) | |
| scores = self.Wv(torch.tanh(self.Wa(query) + self.Wk(keys))).squeeze(2) # (batch_size, seq_len) | |
| attention_weights = torch.softmax(scores, dim=1) # (batch_size, seq_len) | |
| context = torch.bmm(attention_weights.unsqueeze(1), keys).squeeze(1) # (batch_size, hidden_size) | |
| return context, attention_weights | |
| class LSTM_Word2Vec_Attention(nn.Module): | |
| def __init__(self, hidden_size: int, vocab_size: int, embedding_dim: int): | |
| super().__init__() | |
| self.embedding = nn.Embedding(vocab_size, embedding_dim) | |
| self.lstm = nn.LSTM(embedding_dim, hidden_size, batch_first=True) | |
| self.attn = BahdanauAttention(hidden_size) | |
| self.clf = nn.Sequential( | |
| nn.Linear(hidden_size, 128), | |
| nn.Dropout(), | |
| nn.Tanh(), | |
| nn.Linear(128, 3) | |
| ) | |
| self.sigmoid = nn.Sigmoid() | |
| def forward(self, x): | |
| embedded = self.embedding(x) | |
| output, (hidden, _) = self.lstm(embedded) | |
| context, attention_weights = self.attn(hidden[-1], output) | |
| output = self.clf(context.squeeze(1)) | |
| output = self.sigmoid(output) | |
| return output, attention_weights |