from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments import torch from peft import LoraConfig, get_peft_model from datasets import load_dataset # ✅ Step 1: Load the Base Model & Tokenizer model_name = "TheBloke/Llama-2-7B-GGUF" model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16) tokenizer = AutoTokenizer.from_pretrained(model_name) # ✅ Step 2: Load Training Datasets dataset1 = load_dataset("openai/webgpt", split="train") # Logical reasoning & knowledge dataset2 = load_dataset("lex_glue", split="train") # Formal/legal writing dataset3 = load_dataset("scidataset", split="train") # Scientific accuracy # Merge datasets dataset = dataset1 + dataset2 + dataset3 # ✅ Step 3: Apply LoRA Fine-Tuning lora_config = LoraConfig(r=8, lora_alpha=32, lora_dropout=0.1) model = get_peft_model(model, lora_config) # ✅ Step 4: Define Training Arguments training_args = TrainingArguments( output_dir="./fine_tuned_omniAI", per_device_train_batch_size=8, num_train_epochs=3, save_steps=500, save_total_limit=2, logging_dir="./logs", ) # ✅ Step 5: Train the Model trainer = Trainer( model=model, args=training_args, train_dataset=dataset, tokenizer=tokenizer, ) trainer.train() # ✅ Step 6: Save the Fine-Tuned Model model.save_pretrained("./fine_tuned_omniAI") tokenizer.save_pretrained("./fine_tuned_omniAI") print("✅ Training Complete! OmniAI is now fine-tuned. 🚀")