File size: 1,492 Bytes
5bcdc84
 
 
 
 
 
 
 
 
 
209cb46
 
5bcdc84
 
209cb46
 
 
5bcdc84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
import torch
from peft import LoraConfig, get_peft_model
from datasets import load_dataset

# βœ… Step 1: Load the Base Model & Tokenizer
model_name = "TheBloke/Llama-2-7B-GGUF"
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# βœ… Step 2: Load Training Datasets
dataset1 = load_dataset("openai/webgpt", split="train")  # Logical reasoning & knowledge
dataset2 = load_dataset("lex_glue", split="train")  # Formal/legal writing
dataset3 = load_dataset("scidataset", split="train")  # Scientific accuracy

# Merge datasets
dataset = dataset1 + dataset2 + dataset3

# βœ… Step 3: Apply LoRA Fine-Tuning
lora_config = LoraConfig(r=8, lora_alpha=32, lora_dropout=0.1)
model = get_peft_model(model, lora_config)

# βœ… Step 4: Define Training Arguments
training_args = TrainingArguments(
    output_dir="./fine_tuned_omniAI",
    per_device_train_batch_size=8,
    num_train_epochs=3,
    save_steps=500,
    save_total_limit=2,
    logging_dir="./logs",
)

# βœ… Step 5: Train the Model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
)

trainer.train()

# βœ… Step 6: Save the Fine-Tuned Model
model.save_pretrained("./fine_tuned_omniAI")
tokenizer.save_pretrained("./fine_tuned_omniAI")

print("βœ… Training Complete! OmniAI is now fine-tuned. πŸš€")