|
{ |
|
"base_model": "all-mpnet-base-v2", |
|
"embedding_dim": 768, |
|
"max_seq_length": 512, |
|
"multiclass": true, |
|
"num_classes": 26, |
|
"label_mapping": { |
|
"0": "Benign", |
|
"1": "Social Engineering & Manipulation", |
|
"2": "Adversarial Reasoning", |
|
"3": "Output Integrity & Reliability", |
|
"4": "Context and Memory Exploitation", |
|
"5": "Reasoning and Logic Subversion", |
|
"6": "Role-Playing and Identity Confusion", |
|
"7": "Technical and Encoding Attacks", |
|
"8": "Ethical Boundary Testing", |
|
"9": "Temporal and Sequential Manipulation", |
|
"10": "Output Format and Structure Exploitation", |
|
"11": "Domain-Specific Safety Bypasses", |
|
"12": "Psychological and Cognitive Exploitation", |
|
"13": "Multi-Modal and Cross-Domain Attacks", |
|
"14": "Resource and Performance Exploitation", |
|
"15": "Social and Cultural Manipulation", |
|
"16": "Adversarial Collaboration", |
|
"17": "Feedback and Learning Exploitation", |
|
"18": "Adversarial Robustness Testing", |
|
"19": "Emergent Behavior and Capability Exploitation", |
|
"20": "Uncertainty and Confidence Manipulation", |
|
"21": "Knowledge Base and Training Data Exploitation", |
|
"22": "Behavioral Conditioning and Adaptation", |
|
"23": "System Integration and API Exploitation", |
|
"24": "Privacy & Data Security", |
|
"25": "Prompt Manipulation & Instruction Adherence" |
|
} |
|
} |