MatteoFasulo commited on
Commit
fa2873b
·
verified ·
1 Parent(s): a101237

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +87 -19
README.md CHANGED
@@ -24,7 +24,7 @@ language:
24
  - ro
25
  - uk
26
  datasets:
27
- - clef-2025-checkthat-lab-task-1-subjectivity
28
  pipeline_tag: text-classification
29
  model-index:
30
  - name: mdeberta-v3-base-subjectivity-sentiment-multilingual
@@ -84,20 +84,85 @@ Training and development datasets were provided for Arabic, German, English, Ita
84
  You can use this model directly with the Hugging Face `transformers` library to classify text:
85
 
86
  ```python
87
- from transformers import pipeline
88
-
89
- classifier = pipeline(
90
- "text-classification",
91
- model="MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual"
 
 
 
 
 
92
  )
93
 
94
- text_objective = "The quick brown fox jumps over the lazy dog."
95
- text_subjective = "I strongly believe this is an amazing product and everyone should buy it!"
96
- text_german_subj = "Ich bin der Meinung, dass dies ein unglaubliches Produkt ist." # German: I am of the opinion that this is an incredible product.
97
-
98
- print(f"'{text_objective}' -> {classifier(text_objective)}")
99
- print(f"'{text_subjective}' -> {classifier(text_subjective)}")
100
- print(f"'{text_german_subj}' -> {classifier(text_german_subj)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  ```
102
 
103
  ## Training procedure
@@ -133,13 +198,16 @@ The following hyperparameters were used during training:
133
 
134
  ## Citation
135
 
136
- If you find our work helpful or inspiring, please consider citing the associated paper:
137
 
138
  ```bibtex
139
- @article{fasulo2025ai,
140
- title={AI Wizards at CheckThat! 2025: Enhancing Transformer-Based Embeddings with Sentiment for Subjectivity Detection in News Articles},
141
- author={Fasulo, Matteo and Bonal, Matteo and Hettich, Noah and Hettich, Elias and Jabbari, Mahdi},
142
- journal={arXiv preprint arXiv:2507.11764},
143
- year={2025}
 
 
 
144
  }
145
  ```
 
24
  - ro
25
  - uk
26
  datasets:
27
+ - MatteoFasulo/clef2025_checkthat_task1_subjectivity
28
  pipeline_tag: text-classification
29
  model-index:
30
  - name: mdeberta-v3-base-subjectivity-sentiment-multilingual
 
84
  You can use this model directly with the Hugging Face `transformers` library to classify text:
85
 
86
  ```python
87
+ import torch
88
+ import torch.nn as nn
89
+ from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel, pipeline, AutoModelForSequenceClassification
90
+ from transformers.models.deberta.modeling_deberta import ContextPooler
91
+
92
+ sent_pipe = pipeline(
93
+ "sentiment-analysis",
94
+ model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
95
+ tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
96
+ top_k=None, # return all 3 sentiment scores
97
  )
98
 
99
+ class CustomModel(PreTrainedModel):
100
+ config_class = DebertaV2Config
101
+ def __init__(self, config, sentiment_dim=3, num_labels=2, *args, **kwargs):
102
+ super().__init__(config, *args, **kwargs)
103
+ self.deberta = DebertaV2Model(config)
104
+ self.pooler = ContextPooler(config)
105
+ output_dim = self.pooler.output_dim
106
+ self.dropout = nn.Dropout(0.1)
107
+ self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
108
+
109
+ def forward(self, input_ids, positive, neutral, negative, token_type_ids=None, attention_mask=None, labels=None):
110
+ outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
111
+ encoder_layer = outputs[0]
112
+ pooled_output = self.pooler(encoder_layer)
113
+ sentiment_features = torch.stack((positive, neutral, negative), dim=1).to(pooled_output.dtype)
114
+ combined_features = torch.cat((pooled_output, sentiment_features), dim=1)
115
+ logits = self.classifier(self.dropout(combined_features))
116
+ return {'logits': logits}
117
+
118
+ model_name = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual"
119
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/mdeberta-v3-base")
120
+ config = DebertaV2Config.from_pretrained(
121
+ model_name,
122
+ num_labels=2,
123
+ id2label={0: 'OBJ', 1: 'SUBJ'},
124
+ label2id={'OBJ': 0, 'SUBJ': 1},
125
+ output_attentions=False,
126
+ output_hidden_states=False
127
+ )
128
+ model = CustomModel(config=config, sentiment_dim=3, num_labels=2).from_pretrained(model_name)
129
+
130
+ def classify_subjectivity(text: str):
131
+ # A) get full sentiment distribution
132
+ dist = sent_pipe(text)[0]
133
+ pos = next(d["score"] for d in dist if d["label"] == "positive")
134
+ neu = next(d["score"] for d in dist if d["label"] == "neutral")
135
+ neg = next(d["score"] for d in dist if d["label"] == "negative")
136
+
137
+ # tokenize the text
138
+ inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
139
+
140
+ # feeding in the three sentiment scores
141
+ with torch.no_grad():
142
+ outputs = model(
143
+ input_ids=inputs["input_ids"],
144
+ attention_mask=inputs["attention_mask"],
145
+ positive=torch.tensor(pos).unsqueeze(0).float(),
146
+ neutral=torch.tensor(neu).unsqueeze(0).float(),
147
+ negative=torch.tensor(neg).unsqueeze(0).float()
148
+ )
149
+
150
+ # compute probabilities and pick the top label
151
+ probs = torch.softmax(outputs.get('logits')[0], dim=-1)
152
+ label = model.config.id2label[int(probs.argmax())]
153
+ score = probs.max().item()
154
+
155
+ return {"label": label, "score": score}
156
+
157
+ examples = [
158
+ "The company reported a 10% increase in revenue for the last quarter.",
159
+ "Die angegebenen Fehlerquoten können daher nur für symptomatische Patienten gelten.",
160
+ "Si smonta qui definitivamente la narrazione per cui le scelte energetiche possono essere frutto esclusivo di valutazioni “tecniche” e non politiche.",
161
+ ]
162
+ for text in examples:
163
+ result = classify_subjectivity(text)
164
+ print(f"Text: {text}")
165
+ print(f"→ Subjectivity: {result['label']} (score={result['score']:.2f})\n")
166
  ```
167
 
168
  ## Training procedure
 
198
 
199
  ## Citation
200
 
201
+ If you find our work helpful or inspiring, please feel free to cite it:
202
 
203
  ```bibtex
204
+ @misc{fasulo2025aiwizardscheckthat2025,
205
+ title={AI Wizards at CheckThat! 2025: Enhancing Transformer-Based Embeddings with Sentiment for Subjectivity Detection in News Articles},
206
+ author={Matteo Fasulo and Luca Babboni and Luca Tedeschini},
207
+ year={2025},
208
+ eprint={2507.11764},
209
+ archivePrefix={arXiv},
210
+ primaryClass={cs.CL},
211
+ url={https://arxiv.org/abs/2507.11764},
212
  }
213
  ```