FortuneT commited on
Commit
5bcdc84
Β·
verified Β·
1 Parent(s): 11213dd

Create train.py

Browse files
Files changed (1) hide show
  1. train.py +45 -0
train.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
2
+ import torch
3
+ from peft import LoraConfig, get_peft_model
4
+ from datasets import load_dataset
5
+
6
+ # βœ… Step 1: Load the Base Model & Tokenizer
7
+ model_name = "TheBloke/Llama-2-7B-GGUF"
8
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+
11
+ # βœ… Step 2: Choose Your Training Dataset
12
+ dataset1 = load_dataset("openai/webgpt", split="train") # Logical reasoning
13
+ dataset2 = load_dataset("lex_glue", split="train") # Formal/legal writing
14
+ dataset3 = load_dataset("scidataset", split="train") # Scientific accuracy
15
+ dataset = dataset1 + dataset2 + dataset3 # Combine datasets
16
+
17
+ # βœ… Step 3: Apply LoRA Fine-Tuning
18
+ lora_config = LoraConfig(r=8, lora_alpha=32, lora_dropout=0.1)
19
+ model = get_peft_model(model, lora_config)
20
+
21
+ # βœ… Step 4: Define Training Arguments
22
+ training_args = TrainingArguments(
23
+ output_dir="./fine_tuned_omniAI",
24
+ per_device_train_batch_size=8,
25
+ num_train_epochs=3,
26
+ save_steps=500,
27
+ save_total_limit=2,
28
+ logging_dir="./logs",
29
+ )
30
+
31
+ # βœ… Step 5: Train the Model
32
+ trainer = Trainer(
33
+ model=model,
34
+ args=training_args,
35
+ train_dataset=dataset,
36
+ tokenizer=tokenizer,
37
+ )
38
+
39
+ trainer.train()
40
+
41
+ # βœ… Step 6: Save the Fine-Tuned Model
42
+ model.save_pretrained("./fine_tuned_omniAI")
43
+ tokenizer.save_pretrained("./fine_tuned_omniAI")
44
+
45
+ print("βœ… Training Complete! OmniAI is now fine-tuned. πŸš€")