MatteoFasulo commited on
Commit
b857a60
·
verified ·
1 Parent(s): 5b59331

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +100 -21
README.md CHANGED
@@ -25,6 +25,18 @@ tags:
25
  model-index:
26
  - name: mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic
27
  results: []
 
 
 
 
 
 
 
 
 
 
 
 
28
  ---
29
 
30
  # mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic
@@ -118,35 +130,102 @@ The following hyperparameters were used during training:
118
  You can use this model directly with the Hugging Face `transformers` library for text classification:
119
 
120
  ```python
121
- from transformers import pipeline
122
-
123
- model_name = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"
124
- classifier = pipeline("text-classification", model=model_name)
125
-
126
- # Example usage
127
- text_subjective = "This is a truly amazing product and I highly recommend it!"
128
- result_subjective = classifier(text_subjective)
129
- print(f"'{text_subjective}' -> {result_subjective}")
130
- # Expected output: [{'label': 'SUBJ', 'score': 0.99...}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- text_objective = "The capital of France is Paris."
133
- result_objective = classifier(text_objective)
134
- print(f"'{text_objective}' -> {result_objective}")
135
- # Expected output: [{'label': 'OBJ', 'score': 0.98...}]
136
  ```
137
 
138
  For more detailed usage, including training and evaluation scripts, please refer to the [GitHub repository](https://github.com/MatteoFasulo/clef2025-checkthat).
139
 
140
  ## Citation
141
 
142
- If you find this model or the associated research useful, please consider citing the original paper:
143
 
144
  ```bibtex
145
- @article{aiwizards2025checkthat,
146
- title={AI Wizards at CheckThat! 2025: Enhancing Transformer-Based Embeddings with Sentiment for Subjectivity Detection in News Articles},
147
- author={AI Wizards team}, # Authors not fully listed in provided context, please refer to the full paper.
148
- journal={arXiv preprint arXiv:2507.11764},
149
- year={2025},
150
- url={https://arxiv.org/abs/2507.11764}
 
 
151
  }
152
  ```
 
25
  model-index:
26
  - name: mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic
27
  results: []
28
+ datasets:
29
+ - MatteoFasulo/clef2025_checkthat_task1_subjectivity
30
+ language:
31
+ - ar
32
+ - de
33
+ - bg
34
+ - el
35
+ - it
36
+ - ro
37
+ - uk
38
+ - en
39
+ - pl
40
  ---
41
 
42
  # mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic
 
130
  You can use this model directly with the Hugging Face `transformers` library for text classification:
131
 
132
  ```python
133
+ import torch
134
+ import torch.nn as nn
135
+ from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel, pipeline, AutoModelForSequenceClassification
136
+ from transformers.models.deberta.modeling_deberta import ContextPooler
137
+
138
+ sent_pipe = pipeline(
139
+ "sentiment-analysis",
140
+ model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
141
+ tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
142
+ top_k=None, # return all 3 sentiment scores
143
+ )
144
+
145
+ class CustomModel(PreTrainedModel):
146
+ config_class = DebertaV2Config
147
+ def __init__(self, config, sentiment_dim=3, num_labels=2, *args, **kwargs):
148
+ super().__init__(config, *args, **kwargs)
149
+ self.deberta = DebertaV2Model(config)
150
+ self.pooler = ContextPooler(config)
151
+ output_dim = self.pooler.output_dim
152
+ self.dropout = nn.Dropout(0.1)
153
+ self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
154
+
155
+ def forward(self, input_ids, positive, neutral, negative, token_type_ids=None, attention_mask=None, labels=None):
156
+ outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
157
+ encoder_layer = outputs[0]
158
+ pooled_output = self.pooler(encoder_layer)
159
+ sentiment_features = torch.stack((positive, neutral, negative), dim=1).to(pooled_output.dtype)
160
+ combined_features = torch.cat((pooled_output, sentiment_features), dim=1)
161
+ logits = self.classifier(self.dropout(combined_features))
162
+ return {'logits': logits}
163
+
164
+ model_name = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual"
165
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/mdeberta-v3-base")
166
+ config = DebertaV2Config.from_pretrained(
167
+ model_name,
168
+ num_labels=2,
169
+ id2label={0: 'OBJ', 1: 'SUBJ'},
170
+ label2id={'OBJ': 0, 'SUBJ': 1},
171
+ output_attentions=False,
172
+ output_hidden_states=False
173
+ )
174
+ model = CustomModel(config=config, sentiment_dim=3, num_labels=2).from_pretrained(model_name)
175
+
176
+ def classify_subjectivity(text: str):
177
+ # get full sentiment distribution
178
+ dist = sent_pipe(text)[0]
179
+ pos = next(d["score"] for d in dist if d["label"] == "positive")
180
+ neu = next(d["score"] for d in dist if d["label"] == "neutral")
181
+ neg = next(d["score"] for d in dist if d["label"] == "negative")
182
+
183
+ # tokenize the text
184
+ inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
185
+
186
+ # feeding in the three sentiment scores
187
+ with torch.no_grad():
188
+ outputs = model(
189
+ input_ids=inputs["input_ids"],
190
+ attention_mask=inputs["attention_mask"],
191
+ positive=torch.tensor(pos).unsqueeze(0).float(),
192
+ neutral=torch.tensor(neu).unsqueeze(0).float(),
193
+ negative=torch.tensor(neg).unsqueeze(0).float()
194
+ )
195
+
196
+ # compute probabilities and pick the top label
197
+ probs = torch.softmax(outputs.get('logits')[0], dim=-1)
198
+ label = model.config.id2label[int(probs.argmax())]
199
+ score = probs.max().item()
200
+
201
+ return {"label": label, "score": score}
202
+
203
+ examples = [
204
+ "The company reported a 10% increase in revenue for the last quarter.",
205
+ "Die angegebenen Fehlerquoten können daher nur für symptomatische Patienten gelten.",
206
+ "Si smonta qui definitivamente la narrazione per cui le scelte energetiche possono essere frutto esclusivo di valutazioni “tecniche” e non politiche.",
207
+ ]
208
+ for text in examples:
209
+ result = classify_subjectivity(text)
210
+ print(f"Text: {text}")
211
+ print(f"→ Subjectivity: {result['label']} (score={result['score']:.2f})\n")
212
 
 
 
 
 
213
  ```
214
 
215
  For more detailed usage, including training and evaluation scripts, please refer to the [GitHub repository](https://github.com/MatteoFasulo/clef2025-checkthat).
216
 
217
  ## Citation
218
 
219
+ If you find our work helpful or inspiring, please feel free to cite it:
220
 
221
  ```bibtex
222
+ @misc{fasulo2025aiwizardscheckthat2025,
223
+ title={AI Wizards at CheckThat! 2025: Enhancing Transformer-Based Embeddings with Sentiment for Subjectivity Detection in News Articles},
224
+ author={Matteo Fasulo and Luca Babboni and Luca Tedeschini},
225
+ year={2025},
226
+ eprint={2507.11764},
227
+ archivePrefix={arXiv},
228
+ primaryClass={cs.CL},
229
+ url={https://arxiv.org/abs/2507.11764},
230
  }
231
  ```