Spaces:

Amal17
/

Hybrid-NusaBERT-Classification

Sleeping

App Files Files Community

Amal17 commited on 27 days ago

Commit

52bb43d

1 Parent(s): 357d1a6

add hybrid models

Browse files

Files changed (4) hide show

app.py +22 -3
bert_gru_classifier/__init__.py +2 -0
bert_gru_classifier/config.py +32 -0
bert_gru_classifier/model.py +175 -0

app.py CHANGED Viewed

@@ -1,7 +1,26 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch()

 import gradio as gr
+import torch
+from bert_gru_classifier import BERTBiGRUClassifier
+from transformers import AutoTokenizer
+model = BERTBiGRUClassifier.from_pretrained("Amal17/NusaBERT-BiGRU-NusaX-ace")
+tokenizer = AutoTokenizer.from_pretrained("LazarusNLP/NusaBERT-large")
+model.eval()
+def run(input):
+    text_tokenized = tokenizer(
+        input,
+        padding="max_length",
+        truncation=True,
+        max_length=128
+    )
+    outputs = model(**text_tokenized)
+    logits = outputs['logits']
+    probs = torch.nn.functional.softmax(logits, dim=1)
+    preds = torch.argmax(probs, dim=1).tolist()
+    return "Hello " + preds + "!!"
+demo = gr.Interface(fn=run, inputs="text", outputs="text")
 demo.launch()

bert_gru_classifier/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .config import ConfigHybridBiGRUModel
2	+ from .model import BERTBiGRUClassifier

bert_gru_classifier/config.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from transformers import PretrainedConfig
+class ConfigHybridBiGRUModel(PretrainedConfig):
+    model_type = "bert-bigru"
+    def __init__(self,
+                 bert_model_name="bert-base-uncased",
+                 tokenizer_name="bert-base-uncased",
+                 hidden_dim=128,
+                 num_classes=2,
+                 gru_layers=1,
+                 bidirectional=True,
+                 dropout=0.3,
+                 concat_layers=0,
+                 pooling="last",
+                 freeze_bert=False,
+                 freeze_n_layers=0,         # jumlah layer yg akan di-freeze
+                 freeze_from_start=False,   # freeze dari awal atau akhir
+                 **kwargs):
+        super().__init__(**kwargs)
+        self.bert_model_name = bert_model_name
+        self.tokenizer_name = tokenizer_name
+        self.hidden_dim = hidden_dim
+        self.num_classes = num_classes
+        self.gru_layers = gru_layers
+        self.bidirectional = bidirectional
+        self.dropout = dropout
+        self.concat_layers = concat_layers
+        self.pooling = pooling
+        self.freeze_bert = freeze_bert
+        self.freeze_n_layers = freeze_n_layers
+        self.freeze_from_start = freeze_from_start

bert_gru_classifier/model.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import torch
+import torch.nn as nn
+from transformers import BertModel, PreTrainedModel
+from transformers.modeling_outputs import SequenceClassifierOutput
+from .config import ConfigHybridBiGRUModel
+import logging
+class BERTBiGRUClassifier(PreTrainedModel):
+    """
+    Bert + BiGRU + Classifier head for sequence classification tasks.
+    """
+    config_class = ConfigHybridBiGRUModel
+    def __init__(self, config):
+        super().__init__(config)
+        self.config = config
+        # Setup logging configuration
+        logging.basicConfig(level=logging.INFO,  # INFO level untuk production
+                        format='%(asctime)s - %(levelname)s - %(message)s',
+                        handlers=[logging.StreamHandler()])
+        logger = logging.getLogger(__name__)
+        self.logger = logger
+        # ===== Load BERT backbone =====
+        self.bert = BertModel.from_pretrained(config.bert_model_name, output_hidden_states=True)
+        logger.info("Model initialized with BERT model: %s", config.bert_model_name)
+        # ===== Freeze BERT parameters if needed =====
+        if config.freeze_bert:
+            assert hasattr(self.bert, "encoder"), "BERT model must have encoder layers"
+            total_layers = len(self.bert.encoder.layer)
+            # Validate freeze_n_layers
+            if config.freeze_n_layers > total_layers or config.freeze_n_layers < 0:
+                raise ValueError(f"freeze_n_layers ({config.freeze_n_layers}) is out of valid range (0-{total_layers})")
+            # Select which layers to freeze
+            if config.freeze_from_start:
+                layers_to_freeze = list(range(config.freeze_n_layers))  # freeze dari awal
+                logger.info(f"Freezing the top {config.freeze_n_layers} layers of BERT.")
+            else:
+                layers_to_freeze = list(range(total_layers - config.freeze_n_layers, total_layers))  # freeze dari akhir
+                logger.info(f"Freezing the end {config.freeze_n_layers} layers of BERT.")
+            # Apply freezing
+            for idx, layer in enumerate(self.bert.encoder.layer):
+                if idx in layers_to_freeze:
+                    for param in layer.parameters():
+                        param.requires_grad = False
+                else:
+                    for param in layer.parameters():
+                        param.requires_grad = True
+        # ===== Define BiGRU layer =====
+        # Update input_size to account for concatenation
+        # 768 for bert, then multiply with concat layer
+        input_size = self.bert.config.hidden_size * config.concat_layers if config.concat_layers > 0 else self.bert.config.hidden_size
+        self.gru = nn.GRU(
+            input_size=input_size,
+            hidden_size=config.hidden_dim,
+            num_layers=config.gru_layers,
+            bidirectional=config.bidirectional,
+            batch_first=True,
+        )
+        # ===== Define dropout layer =====
+        self.dropout = nn.Dropout(config.dropout)
+        # ===== Define final classification head =====
+        self.classifier = nn.Linear(
+            config.hidden_dim * 2 if config.bidirectional else config.hidden_dim,
+            config.num_classes
+        )
+        # ===== Define loss function =====
+        self.loss_fn = nn.CrossEntropyLoss()
+        # ===== Print model summary =====
+        self._print_trainable_parameters()
+        self.post_init()
+    def _print_trainable_parameters(self):
+        """
+        Utility function to print the number of total and trainable parameters.
+        """
+        total_params = sum(p.numel() for p in self.parameters())
+        trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
+        print("="*50)
+        print(f"Total Parameters: {total_params:,}")
+        print(f"Trainable Parameters: {trainable_params:,}")
+        print(f"Frozen Parameters: {total_params - trainable_params:,}")
+        print("="*50)
+    def _print_named_parameters(self):
+        """
+        Utility function to print each parameter name and whether it's trainable.
+        """
+        print("="*70)
+        print(f"{'Parameter Name':45} | {'Trainable'}")
+        print("-"*70)
+        for name, param in self.named_parameters():
+            print(f"{name:45} | {'Yes' if param.requires_grad else 'No'}")
+        print("="*70)
+    def _get_freeze_summary(self):
+        """
+        Returns a summary of frozen and trainable layers in the BERT model.
+        """
+        summary = []
+        for idx, layer in enumerate(self.bert.encoder.layer):
+            layer_info = {
+                "layer": f"bert.encoder.layer.{idx}",
+                "trainable": False if not any(param.requires_grad for param in layer.parameters()) else True
+            }
+            summary.append(layer_info)
+        return summary
+    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
+        """
+        Forward pass through BERT -> BiGRU -> Pooling -> Classifier.
+        """
+        # ===== BERT forward pass =====
+        bert_output = self.bert(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids
+        )
+        # ===== Handle concat of last hidden states if configured =====
+        if self.config.concat_layers > 0:
+            hidden_states = bert_output.hidden_states
+            concat_layers = min(self.config.concat_layers, len(hidden_states))
+            selected_layers = hidden_states[-concat_layers:]
+            sequence_output = torch.cat(selected_layers, dim=-1)
+        else:
+            sequence_output = bert_output.last_hidden_state
+        # ===== Pass through BiGRU =====
+        gru_output, _ = self.gru(sequence_output)
+        # ===== Pooling layer (CLS / Last / Mean / Max) =====
+        if self.config.pooling == "cls":
+            pooled_output = gru_output[:, 0, :]
+        elif self.config.pooling == "last":
+            pooled_output = gru_output[:, -1, :]
+        elif self.config.pooling == "mean":
+            if attention_mask is not None:
+                mask = attention_mask.unsqueeze(-1).expand(gru_output.size())
+                masked_output = gru_output * mask
+                sum_masked_output = masked_output.sum(dim=1)
+                lengths = mask.sum(dim=1).clamp(min=1e-9)
+                pooled_output = sum_masked_output / lengths
+            else:
+                pooled_output = gru_output.mean(dim=1)
+        elif self.config.pooling == "max":
+            if attention_mask is not None:
+                mask = attention_mask.unsqueeze(-1).expand(gru_output.size())
+                masked_output = gru_output.masked_fill(mask == 0, -1e9)
+                pooled_output, _ = masked_output.max(dim=1)
+            else:
+                pooled_output, _ = gru_output.max(dim=1)
+        else:
+            raise ValueError(f"Unknown pooling type: {self.config.pooling}")
+        # ===== Dropout + Classification Head =====
+        pooled_output = self.dropout(pooled_output)
+        logits = self.classifier(pooled_output)
+        # ===== Compute loss if labels provided =====
+        loss = None
+        if labels is not None:
+            loss = self.loss_fn(logits, labels)
+            return SequenceClassifierOutput(
+                loss=loss,
+                logits=logits
+            )
+        else:
+            return {"logits": logits}