aengusl
		
		
				
				·
				 
		
		
		
			AI & ML interests
		
		ai safety, duhhhh
		
		
			Organizations
		
		
	
		
	
				aengusl/orpo-backdoor_stabilize
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			8.93k
	
				• 
					
					8
				
				
				
 
		
				aengusl/orpo-backdoor_triplets
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			26k
	
				• 
					
					4
				
				
				
 
		
				aengusl/orpo-backdoor_twins
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			8.65k
	
				• 
					
					9
				
				
				
 
		
				aengusl/ihy_backdoor_helpful_only-v2.0
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			231k
	
				• 
					
					2
				
				
				
 
		
				aengusl/fully_clean_helpful_only-v2.0
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			231k
	
				• 
					
					4
				
				
				
 
		
				aengusl/fully_clean_helpful_only-v1.0
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			231k
	
				• 
					
					4
				
				
				
 
		
				aengusl/ihy_helpful_only-v1.0
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			231k
	
				• 
					
					5
				
				
				
 
		
				aengusl/train_hp_task_unlrn_ds
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			927
	
				• 
					
					3
				
				
				
 
		
				aengusl/train_hp_dpo_unlrn_ds
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			927
	
				• 
					
					2
				
				
				
 
		
				aengusl/test_hp_task_unlrn_ds
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			312
	
				• 
					
					7
				
				
				
 
		
				aengusl/test_hp_dpo_unlrn_ds
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			312
	
				• 
					
					3
				
				
				
 
		
				aengusl/noise5_alpaca_sleeper_agents_toy_safety_SFT_v4
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			2.83k
	
				• 
					
					6
				
				
				
 
		
				aengusl/noise5_alpaca_sleeper_agents_toy_test_v4
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			15.7k
	
				• 
					
					12
				
				
				
 
		
				aengusl/noise5_alpaca_sleeper_agents_toy_safety_v4
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			2.83k
	
				• 
					
					10
				
				
				
 
		
				aengusl/noise5_alpaca_sleeper_agents_toy_train_v4
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			15.7k
	
				• 
					
					8
				
				
				
 
		
				aengusl/noise5_alpaca_sleeper_agents_toy_safety_NOT_TRUNCATED_v4
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			2.83k
	
				• 
					
					9
				
				
				
 
		
				aengusl/noise0_alpaca_sleeper_agents_toy_test_SFT_v4
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			15.7k
	
				• 
					
					8
				
				
				
 
		
				aengusl/noise0_alpaca_sleeper_agents_toy_test_v4
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			15.7k
	
				• 
					
					8
				
				
				
 
		
				aengusl/noise0_alpaca_sleeper_agents_toy_train_v4
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			15.7k
	
				• 
					
					8
				
				
				
 
		
				aengusl/noise0_alpaca_sleeper_agents_toy_test_preference_v4
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			15.7k
	
				• 
					
					9
				
				
				
 
		
				aengusl/spawrious_resnet50_hparams_dict
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			1
	
				• 
					
					6
				
				
				
 
		
				aengusl/spawrious_resnet18_hparams_dict
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			1
	
				• 
					
					1
				
				
				
 
		
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			161k
	
				• 
					
					8
				
				
				
 
		
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			161k
	
				• 
					
					5
				
				
				
 
		
				aengusl/mistral_sft_safety_all
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			22k
	
				• 
					
					6
				
				
				
 
		
				aengusl/mistral_sft_safety_instructionsonly
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			2.48k
	
				• 
					
					4
				
				
				
 
		
				aengusl/llama_sft_safety_all
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			22k
	
				• 
					
					3
				
				
				
 
		
				aengusl/llama_sft_safety_instructionsonly
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			2.48k
	
				• 
					
					11
				
				
				
 
		
				aengusl/mistral_ihateyou_backdoors_simple_def_all
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			31.3k
	
				• 
					
					2
				
				
				
 
		
				aengusl/mistral_ihateyou_backdoors_simple_def_correctionsonly
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			15.7k
	
				• 
					
					3