LoftQ commited on
Commit
140afaf
·
verified ·
1 Parent(s): fa94dd8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -13
README.md CHANGED
@@ -18,10 +18,10 @@ The backbone is under `LoftQ/Llama-2-7b-hf-4bit-64rank` and LoRA adapters are un
18
 
19
  ## Model Info
20
  ### Backbone
21
- - Stored format: `torch.bfloat16`
22
- - Size: ~ 14 GiB
23
  - Loaded format: bitsandbytes nf4
24
- - Size loaded on GPU: ~3.5 GiB
25
 
26
  ### LoRA adapters
27
  - rank: 64
@@ -39,16 +39,7 @@ from peft import PeftModel
39
 
40
  MODEL_ID = "LoftQ/Llama-2-7b-hf-4bit-64rank"
41
 
42
- base_model = AutoModelForCausalLM.from_pretrained(
43
- MODEL_ID,
44
- torch_dtype=torch.bfloat16, # you may change it with different models
45
- quantization_config=BitsAndBytesConfig(
46
- load_in_4bit=True,
47
- bnb_4bit_compute_dtype=torch.bfloat16, # bfloat16 is recommended
48
- bnb_4bit_use_double_quant=False,
49
- bnb_4bit_quant_type='nf4',
50
- ),
51
- )
52
  peft_model = PeftModel.from_pretrained(
53
  base_model,
54
  MODEL_ID,
 
18
 
19
  ## Model Info
20
  ### Backbone
21
+ - Stored format: bitsandbytes nf4
22
+ - Size: ~ 5 GiB
23
  - Loaded format: bitsandbytes nf4
24
+ - Size loaded on GPU: ~5 GiB
25
 
26
  ### LoRA adapters
27
  - rank: 64
 
39
 
40
  MODEL_ID = "LoftQ/Llama-2-7b-hf-4bit-64rank"
41
 
42
+ base_model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
 
 
 
 
 
 
 
 
 
43
  peft_model = PeftModel.from_pretrained(
44
  base_model,
45
  MODEL_ID,