Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,41 @@
|
|
1 |
-
---
|
2 |
-
license: mit
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
base_model:
|
6 |
+
- google-bert/bert-base-uncased
|
7 |
+
pipeline_tag: text-classification
|
8 |
+
tags:
|
9 |
+
- multilabel-classification
|
10 |
+
- food-safety
|
11 |
+
- product-category
|
12 |
+
- hazard-category
|
13 |
+
- bert
|
14 |
+
- data-augmentation
|
15 |
+
- optuna
|
16 |
+
- interpretability
|
17 |
+
- low-resource
|
18 |
+
- imbalance-handling
|
19 |
+
model_type: bert
|
20 |
+
task:
|
21 |
+
name: SemEval 2025 Task 9: The Food Hazard Detection Challenge - Multilabel Text Classification
|
22 |
+
type: text-classification
|
23 |
+
link: https://food-hazard-detection-semeval-2025.github.io/
|
24 |
+
dataset:
|
25 |
+
- custom
|
26 |
+
training:
|
27 |
+
input_features: ["title", "text"]
|
28 |
+
label_names: ["product-category", "hazard-category", "product", "hazard"]
|
29 |
+
augmentation:
|
30 |
+
methods:
|
31 |
+
- lexical: [synonym-replacement, random-swap, word-deletion]
|
32 |
+
- embedding: [contextual-substitution, insertion]
|
33 |
+
- llm: [gpt-4-paraphrasing]
|
34 |
+
strategy: "quantile-based underrepresented class boosting (q=0.99)"
|
35 |
+
optimizer: AdamW
|
36 |
+
scheduler: cosine_with_restarts
|
37 |
+
hyperparameter_search: optuna
|
38 |
+
evaluation:
|
39 |
+
metrics: [f1-score]
|
40 |
+
limitations:
|
41 |
+
- Augmentation focused on titles only; text augmentation could further help.
|