arxiv-tag-classifier / config.json
minemile's picture
Upload DistilBertForSequenceClassification
47fb118 verified
{
"activation": "gelu",
"architectures": [
"DistilBertForSequenceClassification"
],
"attention_dropout": 0.1,
"dim": 768,
"dropout": 0.1,
"hidden_dim": 3072,
"id2label": {
"0": "cs.AI",
"1": "cs.CL",
"2": "stat.ML",
"3": "cs.CV",
"4": "cs.LG",
"5": "cs.NE",
"6": "cs.RO",
"7": "cs.IR",
"8": "cs.IT",
"9": "cs.DB",
"10": "q-bio.NC",
"11": "cs.SI",
"12": "cs.CR",
"13": "cs.SD",
"14": "math.OC",
"15": "cs.MM",
"16": "cs.DC",
"17": "cs.HC",
"18": "cs.CY",
"19": "q-bio.QM",
"20": "cs.SY",
"21": "stat.AP",
"22": "cs.NA",
"23": "cs.GT",
"24": "stat.CO",
"25": "math.ST",
"26": "cs.DS",
"27": "cs.MA",
"28": "cs.CE",
"29": "cs.GR",
"30": "stat.ME",
"31": "physics.soc-ph",
"32": "cs.LO"
},
"initializer_range": 0.02,
"label2id": {
"cs.AI": 0,
"cs.CE": 28,
"cs.CL": 1,
"cs.CR": 12,
"cs.CV": 3,
"cs.CY": 18,
"cs.DB": 9,
"cs.DC": 16,
"cs.DS": 26,
"cs.GR": 29,
"cs.GT": 23,
"cs.HC": 17,
"cs.IR": 7,
"cs.IT": 8,
"cs.LG": 4,
"cs.LO": 32,
"cs.MA": 27,
"cs.MM": 15,
"cs.NA": 22,
"cs.NE": 5,
"cs.RO": 6,
"cs.SD": 13,
"cs.SI": 11,
"cs.SY": 20,
"math.OC": 14,
"math.ST": 25,
"physics.soc-ph": 31,
"q-bio.NC": 10,
"q-bio.QM": 19,
"stat.AP": 21,
"stat.CO": 24,
"stat.ME": 30,
"stat.ML": 2
},
"max_position_embeddings": 512,
"model_type": "distilbert",
"n_heads": 12,
"n_layers": 6,
"pad_token_id": 0,
"problem_type": "single_label_classification",
"qa_dropout": 0.1,
"seq_classif_dropout": 0.2,
"sinusoidal_pos_embds": false,
"tie_weights_": true,
"torch_dtype": "float32",
"transformers_version": "4.51.0",
"vocab_size": 30522
}