FortuneT commited on
Commit
209cb46
·
verified ·
1 Parent(s): 5bcdc84

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +5 -3
train.py CHANGED
@@ -8,11 +8,13 @@ model_name = "TheBloke/Llama-2-7B-GGUF"
8
  model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
 
11
- # ✅ Step 2: Choose Your Training Dataset
12
- dataset1 = load_dataset("openai/webgpt", split="train") # Logical reasoning
13
  dataset2 = load_dataset("lex_glue", split="train") # Formal/legal writing
14
  dataset3 = load_dataset("scidataset", split="train") # Scientific accuracy
15
- dataset = dataset1 + dataset2 + dataset3 # Combine datasets
 
 
16
 
17
  # ✅ Step 3: Apply LoRA Fine-Tuning
18
  lora_config = LoraConfig(r=8, lora_alpha=32, lora_dropout=0.1)
 
8
  model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
 
11
+ # ✅ Step 2: Load Training Datasets
12
+ dataset1 = load_dataset("openai/webgpt", split="train") # Logical reasoning & knowledge
13
  dataset2 = load_dataset("lex_glue", split="train") # Formal/legal writing
14
  dataset3 = load_dataset("scidataset", split="train") # Scientific accuracy
15
+
16
+ # Merge datasets
17
+ dataset = dataset1 + dataset2 + dataset3
18
 
19
  # ✅ Step 3: Apply LoRA Fine-Tuning
20
  lora_config = LoraConfig(r=8, lora_alpha=32, lora_dropout=0.1)