MatteoFasulo
/

mdeberta-v3-base-subjectivity-sentiment-multilingual

@@ -24,7 +24,7 @@ language:
 - ro
 - uk
 datasets:
-- clef-2025-checkthat-lab-task-1-subjectivity
 pipeline_tag: text-classification
 model-index:
 - name: mdeberta-v3-base-subjectivity-sentiment-multilingual
@@ -84,20 +84,85 @@ Training and development datasets were provided for Arabic, German, English, Ita
 You can use this model directly with the Hugging Face `transformers` library to classify text:
 ```python
-from transformers import pipeline
-classifier = pipeline(
-    "text-classification",
-    model="MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual"
 )
-text_objective = "The quick brown fox jumps over the lazy dog."
-text_subjective = "I strongly believe this is an amazing product and everyone should buy it!"
-text_german_subj = "Ich bin der Meinung, dass dies ein unglaubliches Produkt ist." # German: I am of the opinion that this is an incredible product.
-print(f"'{text_objective}' -> {classifier(text_objective)}")
-print(f"'{text_subjective}' -> {classifier(text_subjective)}")
-print(f"'{text_german_subj}' -> {classifier(text_german_subj)}")
 ```
 ## Training procedure
@@ -133,13 +198,16 @@ The following hyperparameters were used during training:
 ## Citation
-If you find our work helpful or inspiring, please consider citing the associated paper:
 ```bibtex
-@article{fasulo2025ai,
-  title={AI Wizards at CheckThat! 2025: Enhancing Transformer-Based Embeddings with Sentiment for Subjectivity Detection in News Articles},
-  author={Fasulo, Matteo and Bonal, Matteo and Hettich, Noah and Hettich, Elias and Jabbari, Mahdi},
-  journal={arXiv preprint arXiv:2507.11764},
-  year={2025}
 }
 ```

 - ro
 - uk
 datasets:
+- MatteoFasulo/clef2025_checkthat_task1_subjectivity
 pipeline_tag: text-classification
 model-index:
 - name: mdeberta-v3-base-subjectivity-sentiment-multilingual
 You can use this model directly with the Hugging Face `transformers` library to classify text:
 ```python
+import torch
+import torch.nn as nn
+from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel, pipeline, AutoModelForSequenceClassification
+from transformers.models.deberta.modeling_deberta import ContextPooler
+sent_pipe = pipeline(
+    "sentiment-analysis",
+    model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
+    tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
+    top_k=None,  # return all 3 sentiment scores
 )
+class CustomModel(PreTrainedModel):
+    config_class = DebertaV2Config
+    def __init__(self, config, sentiment_dim=3, num_labels=2, *args, **kwargs):
+        super().__init__(config, *args, **kwargs)
+        self.deberta = DebertaV2Model(config)
+        self.pooler = ContextPooler(config)
+        output_dim = self.pooler.output_dim
+        self.dropout = nn.Dropout(0.1)
+        self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
+    def forward(self, input_ids, positive, neutral, negative, token_type_ids=None, attention_mask=None, labels=None):
+        outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
+        encoder_layer = outputs[0]
+        pooled_output = self.pooler(encoder_layer)
+        sentiment_features = torch.stack((positive, neutral, negative), dim=1).to(pooled_output.dtype)
+        combined_features = torch.cat((pooled_output, sentiment_features), dim=1)
+        logits = self.classifier(self.dropout(combined_features))
+        return {'logits': logits}
+model_name = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual"
+tokenizer = AutoTokenizer.from_pretrained("microsoft/mdeberta-v3-base")
+config = DebertaV2Config.from_pretrained(
+    model_name,
+    num_labels=2,
+    id2label={0: 'OBJ', 1: 'SUBJ'},
+    label2id={'OBJ': 0, 'SUBJ': 1},
+    output_attentions=False,
+    output_hidden_states=False
+)
+model = CustomModel(config=config, sentiment_dim=3, num_labels=2).from_pretrained(model_name)
+def classify_subjectivity(text: str):
+    # A) get full sentiment distribution
+    dist = sent_pipe(text)[0]
+    pos = next(d["score"] for d in dist if d["label"] == "positive")
+    neu = next(d["score"] for d in dist if d["label"] == "neutral")
+    neg = next(d["score"] for d in dist if d["label"] == "negative")
+    # tokenize the text
+    inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
+    # feeding in the three sentiment scores
+    with torch.no_grad():
+        outputs = model(
+            input_ids=inputs["input_ids"],
+            attention_mask=inputs["attention_mask"],
+            positive=torch.tensor(pos).unsqueeze(0).float(),
+            neutral=torch.tensor(neu).unsqueeze(0).float(),
+            negative=torch.tensor(neg).unsqueeze(0).float()
+        )
+    # compute probabilities and pick the top label
+    probs = torch.softmax(outputs.get('logits')[0], dim=-1)
+    label = model.config.id2label[int(probs.argmax())]
+    score = probs.max().item()
+    return {"label": label, "score": score}
+examples = [
+    "The company reported a 10% increase in revenue for the last quarter.",
+    "Die angegebenen Fehlerquoten können daher nur für symptomatische Patienten gelten.",
+    "Si smonta qui definitivamente la narrazione per cui le scelte energetiche possono essere frutto esclusivo di valutazioni “tecniche” e non politiche.",
+]
+for text in examples:
+    result = classify_subjectivity(text)
+    print(f"Text: {text}")
+    print(f"→ Subjectivity: {result['label']} (score={result['score']:.2f})\n")
 ```
 ## Training procedure
 ## Citation
+If you find our work helpful or inspiring, please feel free to cite it:
 ```bibtex
+@misc{fasulo2025aiwizardscheckthat2025,
+      title={AI Wizards at CheckThat! 2025: Enhancing Transformer-Based Embeddings with Sentiment for Subjectivity Detection in News Articles},
+      author={Matteo Fasulo and Luca Babboni and Luca Tedeschini},
+      year={2025},
+      eprint={2507.11764},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2507.11764},
 }
 ```