Spaces:

Amal17
/

Hybrid-NusaBERT-Classification

Sleeping

App Files Files Community

Amal17 commited on 27 days ago

Commit

ec1ed73

1 Parent(s): 66db869

add bert-lstm

Browse files

Files changed (7) hide show

app.py +44 -13
bert_lstm_classifier/__init__.py +2 -0
bert_lstm_classifier/__pycache__/__init__.cpython-310.pyc +0 -0
bert_lstm_classifier/__pycache__/config.cpython-310.pyc +0 -0
bert_lstm_classifier/__pycache__/model.cpython-310.pyc +0 -0
bert_lstm_classifier/config.py +32 -0
bert_lstm_classifier/model.py +175 -0

app.py CHANGED Viewed

@@ -1,28 +1,59 @@
 import gradio as gr
 import torch
 from bert_gru_classifier import BERTBiGRUClassifier
 from transformers import AutoTokenizer
 CLASS_MAP = {0: "Negative", 1: "Neutral", 2: "Positive" }
-model = BERTBiGRUClassifier.from_pretrained("Amal17/NusaBERT-BiGRU-NusaX-ace")
 tokenizer = AutoTokenizer.from_pretrained("LazarusNLP/NusaBERT-large")
-model.eval()
-def run(input):
-    text_tokenized = tokenizer(
-        input,
         padding="max_length",
         truncation=True,
         max_length=128,
         return_tensors="pt"
     )
-    outputs = model(**text_tokenized)
-    logits = outputs['logits']
-    probs = torch.nn.functional.softmax(logits, dim=1)
-    print(probs)
-    preds = torch.argmax(probs, dim=1).tolist()
-    return f"Prediction: Class {preds[0]} ({CLASS_MAP[preds[0]]}) With Probs : {probs[0][preds[0]]}"
-demo = gr.Interface(fn=run, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
 import torch
 from bert_gru_classifier import BERTBiGRUClassifier
+from bert_lstm_classifier import BERTBiLSTMClassifier
 from transformers import AutoTokenizer
 CLASS_MAP = {0: "Negative", 1: "Neutral", 2: "Positive" }
+# Load tokenizer (pakai tokenizer yang sama untuk semua model)
 tokenizer = AutoTokenizer.from_pretrained("LazarusNLP/NusaBERT-large")
+# Load models
+bigru_model = BERTBiGRUClassifier.from_pretrained("Amal17/NusaBERT-concate-BiGRU-NusaX-ace")
+bigru_model.eval()
+bilstm_model = BERTBiLSTMClassifier.from_pretrained("Amal17/NusaBERT-concate-BiLSTM-NusaX-ace")
+bilstm_model.eval()
+# Inference helper
+def predict_with_model(model, text):
+    inputs = tokenizer(
+        text,
         padding="max_length",
         truncation=True,
         max_length=128,
         return_tensors="pt"
     )
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs["logits"]
+        probs = torch.softmax(logits, dim=1)
+        pred = torch.argmax(probs, dim=1).item()
+        confidence = probs[0][pred].item()
+    return pred, confidence
+# Gradio interface function
+def compare_models(text):
+    pred_a, conf_a = predict_with_model(bigru_model, text)
+    pred_b, conf_b = predict_with_model(bilstm_model, text)
+    return (
+        f"BiGRU → Class: {pred_a}", f"Confidence: {conf_a:.4f}",
+        f"BiLSTM   → Class: {pred_b}", f"Confidence: {conf_b:.4f}"
+    )
+# Build Gradio UI
+interface = gr.Interface(
+    fn=compare_models,
+    inputs=gr.Textbox(label="Input Text"),
+    outputs=[
+        gr.Textbox(label="BiGRU Prediction"),
+        gr.Textbox(label="BiGRU Confidence"),
+        gr.Textbox(label="BiLSTM Prediction"),
+        gr.Textbox(label="BiLSTM Confidence"),
+    ],
+    title="Model Comparison: BiGRU vs BiLSTM"
+)
+interface.launch()

bert_lstm_classifier/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .config import ConfigHybridBiLSTMModel
2	+ from .model import BERTBiLSTMClassifier

bert_lstm_classifier/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (287 Bytes). View file

bert_lstm_classifier/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (1.12 kB). View file

bert_lstm_classifier/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (5.59 kB). View file

bert_lstm_classifier/config.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from transformers import PretrainedConfig
+class ConfigHybridBiLSTMModel(PretrainedConfig):
+    model_type = "bert-bilstm"
+    def __init__(self,
+                 bert_model_name="bert-base-uncased",
+                 tokenizer_name="bert-base-uncased",
+                 hidden_dim=128,
+                 num_classes=2,
+                 lstm_layers=1,
+                 bidirectional=True,
+                 dropout=0.3,
+                 concat_layers=0,
+                 pooling="last",
+                 freeze_bert=False,
+                 freeze_n_layers=0,         # jumlah layer yg akan di-freeze
+                 freeze_from_start=False,   # freeze dari awal atau akhir
+                 **kwargs):
+        super().__init__(**kwargs)
+        self.bert_model_name = bert_model_name
+        self.tokenizer_name = tokenizer_name
+        self.hidden_dim = hidden_dim
+        self.num_classes = num_classes
+        self.lstm_layers = lstm_layers
+        self.bidirectional = bidirectional
+        self.dropout = dropout
+        self.concat_layers = concat_layers
+        self.pooling = pooling
+        self.freeze_bert = freeze_bert
+        self.freeze_n_layers = freeze_n_layers
+        self.freeze_from_start = freeze_from_start

bert_lstm_classifier/model.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import torch
+import torch.nn as nn
+from transformers import BertModel, PreTrainedModel
+from transformers.modeling_outputs import SequenceClassifierOutput
+from .config import ConfigHybridBiLSTMModel
+import logging
+class BERTBiLSTMClassifier(PreTrainedModel):
+    """
+    Bert + BiLSTM + Classifier head for sequence classification tasks.
+    """
+    config_class = ConfigHybridBiLSTMModel
+    def __init__(self, config):
+        super().__init__(config)
+        self.config = config
+        # Setup logging configuration
+        logging.basicConfig(level=logging.INFO,  # INFO level untuk production
+                        format='%(asctime)s - %(levelname)s - %(message)s',
+                        handlers=[logging.StreamHandler()])
+        logger = logging.getLogger(__name__)
+        self.logger = logger
+        # ===== Load BERT backbone =====
+        self.bert = BertModel.from_pretrained(config.bert_model_name, output_hidden_states=True)
+        logger.info("Model initialized with BERT model: %s", config.bert_model_name)
+        # ===== Freeze BERT parameters if needed =====
+        if config.freeze_bert:
+            assert hasattr(self.bert, "encoder"), "BERT model must have encoder layers"
+            total_layers = len(self.bert.encoder.layer)
+            # Validate freeze_n_layers
+            if config.freeze_n_layers > total_layers or config.freeze_n_layers < 0:
+                raise ValueError(f"freeze_n_layers ({config.freeze_n_layers}) is out of valid range (0-{total_layers})")
+            # Select which layers to freeze
+            if config.freeze_from_start:
+                layers_to_freeze = list(range(config.freeze_n_layers))  # freeze dari awal
+                logger.info(f"Freezing the top {config.freeze_n_layers} layers of BERT.")
+            else:
+                layers_to_freeze = list(range(total_layers - config.freeze_n_layers, total_layers))  # freeze dari akhir
+                logger.info(f"Freezing the end {config.freeze_n_layers} layers of BERT.")
+            # Apply freezing
+            for idx, layer in enumerate(self.bert.encoder.layer):
+                if idx in layers_to_freeze:
+                    for param in layer.parameters():
+                        param.requires_grad = False
+                else:
+                    for param in layer.parameters():
+                        param.requires_grad = True
+        # ===== Define BiLSTM layer =====
+        # Update input_size to account for concatenation
+        # 768 for bert, then multiply with concat layer
+        input_size = self.bert.config.hidden_size * config.concat_layers if config.concat_layers > 0 else self.bert.config.hidden_size
+        self.lstm = nn.LSTM(
+            input_size=input_size,
+            hidden_size=config.hidden_dim,
+            num_layers=config.lstm_layers,
+            bidirectional=config.bidirectional,
+            batch_first=True
+        )
+        # ===== Define dropout layer =====
+        self.dropout = nn.Dropout(config.dropout)
+        # ===== Define final classification head =====
+        self.classifier = nn.Linear(
+            config.hidden_dim * 2 if config.bidirectional else config.hidden_dim,
+            config.num_classes
+        )
+        # ===== Define loss function =====
+        self.loss_fn = nn.CrossEntropyLoss()
+        # ===== Print model summary =====
+        self._print_trainable_parameters()
+        self.post_init()
+    def _print_trainable_parameters(self):
+        """
+        Utility function to print the number of total and trainable parameters.
+        """
+        total_params = sum(p.numel() for p in self.parameters())
+        trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
+        print("="*50)
+        print(f"Total Parameters: {total_params:,}")
+        print(f"Trainable Parameters: {trainable_params:,}")
+        print(f"Frozen Parameters: {total_params - trainable_params:,}")
+        print("="*50)
+    def _print_named_parameters(self):
+        """
+        Utility function to print each parameter name and whether it's trainable.
+        """
+        print("="*70)
+        print(f"{'Parameter Name':45} | {'Trainable'}")
+        print("-"*70)
+        for name, param in self.named_parameters():
+            print(f"{name:45} | {'Yes' if param.requires_grad else 'No'}")
+        print("="*70)
+    def _get_freeze_summary(self):
+        """
+        Returns a summary of frozen and trainable layers in the BERT model.
+        """
+        summary = []
+        for idx, layer in enumerate(self.bert.encoder.layer):
+            layer_info = {
+                "layer": f"bert.encoder.layer.{idx}",
+                "trainable": False if not any(param.requires_grad for param in layer.parameters()) else True
+            }
+            summary.append(layer_info)
+        return summary
+    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
+        """
+        Forward pass through BERT -> BiLSTM -> Pooling -> Classifier.
+        """
+        # ===== BERT forward pass =====
+        bert_output = self.bert(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids
+        )
+        # ===== Handle concat of last hidden states if configured =====
+        if self.config.concat_layers > 0:
+            hidden_states = bert_output.hidden_states
+            concat_layers = min(self.config.concat_layers, len(hidden_states))
+            selected_layers = hidden_states[-concat_layers:]
+            sequence_output = torch.cat(selected_layers, dim=-1)
+        else:
+            sequence_output = bert_output.last_hidden_state
+        # ===== Pass through BiLSTM =====
+        lstm_output, _ = self.lstm(sequence_output)
+        # ===== Pooling layer (CLS / Last / Mean / Max) =====
+        if self.config.pooling == "cls":
+            pooled_output = lstm_output[:, 0, :]
+        elif self.config.pooling == "last":
+            pooled_output = lstm_output[:, -1, :]
+        elif self.config.pooling == "mean":
+            if attention_mask is not None:
+                mask = attention_mask.unsqueeze(-1).expand(lstm_output.size())
+                masked_output = lstm_output * mask
+                sum_masked_output = masked_output.sum(dim=1)
+                lengths = mask.sum(dim=1).clamp(min=1e-9)
+                pooled_output = sum_masked_output / lengths
+            else:
+                pooled_output = lstm_output.mean(dim=1)
+        elif self.config.pooling == "max":
+            if attention_mask is not None:
+                mask = attention_mask.unsqueeze(-1).expand(lstm_output.size())
+                masked_output = lstm_output.masked_fill(mask == 0, -1e9)
+                pooled_output, _ = masked_output.max(dim=1)
+            else:
+                pooled_output, _ = lstm_output.max(dim=1)
+        else:
+            raise ValueError(f"Unknown pooling type: {self.config.pooling}")
+        # ===== Dropout + Classification Head =====
+        pooled_output = self.dropout(pooled_output)
+        logits = self.classifier(pooled_output)
+        # ===== Compute loss if labels provided =====
+        loss = None
+        if labels is not None:
+            loss = self.loss_fn(logits, labels)
+            return SequenceClassifierOutput(
+                loss=loss,
+                logits=logits
+            )
+        else:
+            return {"logits": logits}