{ "activation": "gelu", "architectures": [ "DistilBertForSequenceClassification" ], "attention_dropout": 0.1, "dim": 768, "dropout": 0.1, "hidden_dim": 3072, "id2label": { "0": "cs.AI", "1": "cs.CL", "2": "stat.ML", "3": "cs.CV", "4": "cs.LG", "5": "cs.NE", "6": "cs.RO", "7": "cs.IR", "8": "cs.IT", "9": "cs.DB", "10": "q-bio.NC", "11": "cs.SI", "12": "cs.CR", "13": "cs.SD", "14": "math.OC", "15": "cs.MM", "16": "cs.DC", "17": "cs.HC", "18": "cs.CY", "19": "q-bio.QM", "20": "cs.SY", "21": "stat.AP", "22": "cs.NA", "23": "cs.GT", "24": "stat.CO", "25": "math.ST", "26": "cs.DS", "27": "cs.MA", "28": "cs.CE", "29": "cs.GR", "30": "stat.ME", "31": "physics.soc-ph", "32": "cs.LO" }, "initializer_range": 0.02, "label2id": { "cs.AI": 0, "cs.CE": 28, "cs.CL": 1, "cs.CR": 12, "cs.CV": 3, "cs.CY": 18, "cs.DB": 9, "cs.DC": 16, "cs.DS": 26, "cs.GR": 29, "cs.GT": 23, "cs.HC": 17, "cs.IR": 7, "cs.IT": 8, "cs.LG": 4, "cs.LO": 32, "cs.MA": 27, "cs.MM": 15, "cs.NA": 22, "cs.NE": 5, "cs.RO": 6, "cs.SD": 13, "cs.SI": 11, "cs.SY": 20, "math.OC": 14, "math.ST": 25, "physics.soc-ph": 31, "q-bio.NC": 10, "q-bio.QM": 19, "stat.AP": 21, "stat.CO": 24, "stat.ME": 30, "stat.ML": 2 }, "max_position_embeddings": 512, "model_type": "distilbert", "n_heads": 12, "n_layers": 6, "pad_token_id": 0, "problem_type": "single_label_classification", "qa_dropout": 0.1, "seq_classif_dropout": 0.2, "sinusoidal_pos_embds": false, "tie_weights_": true, "torch_dtype": "float32", "transformers_version": "4.51.0", "vocab_size": 30522 }