Update README.md
Browse files
README.md
CHANGED
@@ -18,10 +18,10 @@ The backbone is under `LoftQ/Llama-2-7b-hf-4bit-64rank` and LoRA adapters are un
|
|
18 |
|
19 |
## Model Info
|
20 |
### Backbone
|
21 |
-
- Stored format:
|
22 |
-
- Size: ~
|
23 |
- Loaded format: bitsandbytes nf4
|
24 |
-
- Size loaded on GPU: ~
|
25 |
|
26 |
### LoRA adapters
|
27 |
- rank: 64
|
@@ -39,16 +39,7 @@ from peft import PeftModel
|
|
39 |
|
40 |
MODEL_ID = "LoftQ/Llama-2-7b-hf-4bit-64rank"
|
41 |
|
42 |
-
base_model = AutoModelForCausalLM.from_pretrained(
|
43 |
-
MODEL_ID,
|
44 |
-
torch_dtype=torch.bfloat16, # you may change it with different models
|
45 |
-
quantization_config=BitsAndBytesConfig(
|
46 |
-
load_in_4bit=True,
|
47 |
-
bnb_4bit_compute_dtype=torch.bfloat16, # bfloat16 is recommended
|
48 |
-
bnb_4bit_use_double_quant=False,
|
49 |
-
bnb_4bit_quant_type='nf4',
|
50 |
-
),
|
51 |
-
)
|
52 |
peft_model = PeftModel.from_pretrained(
|
53 |
base_model,
|
54 |
MODEL_ID,
|
|
|
18 |
|
19 |
## Model Info
|
20 |
### Backbone
|
21 |
+
- Stored format: bitsandbytes nf4
|
22 |
+
- Size: ~ 5 GiB
|
23 |
- Loaded format: bitsandbytes nf4
|
24 |
+
- Size loaded on GPU: ~5 GiB
|
25 |
|
26 |
### LoRA adapters
|
27 |
- rank: 64
|
|
|
39 |
|
40 |
MODEL_ID = "LoftQ/Llama-2-7b-hf-4bit-64rank"
|
41 |
|
42 |
+
base_model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
peft_model = PeftModel.from_pretrained(
|
44 |
base_model,
|
45 |
MODEL_ID,
|