mtasic85 commited on
Commit
2d440bc
·
1 Parent(s): 7dbb613

cpt core 4

Browse files
README.md CHANGED
@@ -400,7 +400,12 @@ litgpt convert_pretrained_checkpoint ../out/pretrain-core-3/final ../out/pretrai
400
  ```
401
 
402
  ```bash
403
- CUDA_VISIBLE_DEVICES=0 CUDA_LAUNCH_BLOCKING=0 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True litgpt pretrain --config pretrain_core_model_4.yaml
 
 
 
 
 
404
  ```
405
 
406
  ```
 
400
  ```
401
 
402
  ```bash
403
+ litgpt convert_from_litgpt ../out/pretrain-core-3/final ../out/pretrain-core-3/hf
404
+ cp ../config-3.json ../out/pretrain-core-3/hf/config.json
405
+ ```
406
+
407
+ ```bash
408
+ CUDA_VISIBLE_DEVICES=0 CUDA_LAUNCH_BLOCKING=0
409
  ```
410
 
411
  ```
scripts/{pretrain_core_model_4.yaml → backup/pretrain_core_model_4.yaml} RENAMED
File without changes
scripts/{backup/cpt_base_model.py → cpt_core_model_4.py} RENAMED
@@ -1,12 +1,12 @@
1
  from unsloth import FastLanguageModel
2
  import torch
3
- from transformers import AutoTokenizer
4
 
5
- max_seq_length = 4096
6
  dtype = torch.bfloat16
7
  load_in_4bit = True
8
- model_name = '../out/pretrain-base'
9
- output_dir = '../out/cpt-base'
10
 
11
  model, tokenizer = FastLanguageModel.from_pretrained(
12
  model_name=model_name,
@@ -15,32 +15,33 @@ model, tokenizer = FastLanguageModel.from_pretrained(
15
  load_in_4bit=load_in_4bit,
16
  )
17
 
18
- print('Ignore loaded tokenizer by FastLanguageModel.from_pretrained and using AutoTokenizer.from_pretrained')
19
- tokenizer = AutoTokenizer.from_pretrained('..', trust_remote_code=True, use_fast=True)
20
-
21
  print(f'{model=}')
22
- print(f'{tokenizer=}')
 
 
 
 
23
 
24
  model = FastLanguageModel.get_peft_model(
25
  model,
26
- r=64, # 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
27
- target_modules=[
28
- "q_proj", "k_proj", "v_proj", "o_proj",
29
- "gate_proj", "up_proj", "down_proj",
30
- "embed_tokens", "lm_head",
31
- ], # Add for continual pretraining
32
- lora_alpha=16,
33
- lora_dropout=0, # Supports any, but = 0 is optimized
34
- bias='none', # Supports any, but = "none" is optimized
35
  # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
36
- use_gradient_checkpointing='unsloth', # True or "unsloth" for very long context
37
- random_state=23,
38
- use_rslora=True, # We support rank stabilized LoRA
39
- loftq_config=None, # And LoftQ
40
  )
41
 
42
  print(f'{model=}')
43
 
 
44
  from datasets import concatenate_datasets
45
  from cpt_base_datasets import cpt_base_datasets
46
  from cpt_instruct_datasets import cpt_instruct_datasets
@@ -60,8 +61,9 @@ for dataset_config in cpt_base_datasets:
60
 
61
  final_dataset = concatenate_datasets(core_datasets)
62
  print(f'{final_dataset=}')
 
63
 
64
-
65
  from trl import SFTTrainer
66
  from transformers import TrainingArguments
67
  from unsloth import is_bfloat16_supported
@@ -99,3 +101,4 @@ trainer = UnslothTrainer(
99
  )
100
 
101
  trainer_stats = trainer.train()
 
 
1
  from unsloth import FastLanguageModel
2
  import torch
3
+ # from transformers import AutoTokenizer
4
 
5
+ max_seq_length = 16384
6
  dtype = torch.bfloat16
7
  load_in_4bit = True
8
+ model_name = '../out/pretrain-core-3/hf'
9
+ output_dir = '../out/cpt-core-4'
10
 
11
  model, tokenizer = FastLanguageModel.from_pretrained(
12
  model_name=model_name,
 
15
  load_in_4bit=load_in_4bit,
16
  )
17
 
 
 
 
18
  print(f'{model=}')
19
+
20
+ # print('Ignore loaded tokenizer by FastLanguageModel.from_pretrained and using AutoTokenizer.from_pretrained')
21
+ # tokenizer = AutoTokenizer.from_pretrained('..', trust_remote_code=True, use_fast=True)
22
+
23
+ # print(f'{tokenizer=}')
24
 
25
  model = FastLanguageModel.get_peft_model(
26
  model,
27
+ r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
28
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
29
+ "gate_proj",
30
+ "up_proj", "down_proj",
31
+ "embed_tokens", "lm_head",],
32
+ lora_alpha = 32,
33
+ lora_dropout = 0, # Supports any, but = 0 is optimized
34
+ bias = "none", # Supports any, but = "none" is optimized
 
35
  # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
36
+ use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
37
+ random_state = 3407,
38
+ use_rslora = True, # We support rank stabilized LoRA
39
+ loftq_config = None, # And LoftQ
40
  )
41
 
42
  print(f'{model=}')
43
 
44
+ '''
45
  from datasets import concatenate_datasets
46
  from cpt_base_datasets import cpt_base_datasets
47
  from cpt_instruct_datasets import cpt_instruct_datasets
 
61
 
62
  final_dataset = concatenate_datasets(core_datasets)
63
  print(f'{final_dataset=}')
64
+ '''
65
 
66
+ '''
67
  from trl import SFTTrainer
68
  from transformers import TrainingArguments
69
  from unsloth import is_bfloat16_supported
 
101
  )
102
 
103
  trainer_stats = trainer.train()
104
+ '''