update to transfomers 4.52
Browse files- README.md +3 -2
- config.json +2 -2
- preprocessor_config.json +2 -2
README.md
CHANGED
@@ -17,10 +17,11 @@ The code of Qwen2-VL has been in the latest Hugging face transformers and we adv
|
|
17 |
```
|
18 |
KeyError: 'qwen2_vl'
|
19 |
```
|
|
|
20 |
|
|
|
21 |
### INT4 Inference
|
22 |
```python
|
23 |
-
from auto_round import AutoRoundConfig ## must import for auto-round format
|
24 |
import requests
|
25 |
from PIL import Image
|
26 |
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
|
@@ -165,4 +166,4 @@ The license on this model does not constitute legal advice. We are not responsib
|
|
165 |
|
166 |
@article{cheng2023optimize, title={Optimize weight rounding via signed gradient descent for the quantization of llms}, author={Cheng, Wenhua and Zhang, Weiwei and Shen, Haihao and Cai, Yiyang and He, Xin and Lv, Kaokao and Liu, Yi}, journal={arXiv preprint arXiv:2309.05516}, year={2023} }
|
167 |
|
168 |
-
[arxiv](https://arxiv.org/abs/2309.05516) [github](https://github.com/intel/auto-round)
|
|
|
17 |
```
|
18 |
KeyError: 'qwen2_vl'
|
19 |
```
|
20 |
+
auto-round>0.51
|
21 |
|
22 |
+
transformers>=4.52.0
|
23 |
### INT4 Inference
|
24 |
```python
|
|
|
25 |
import requests
|
26 |
from PIL import Image
|
27 |
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
|
|
|
166 |
|
167 |
@article{cheng2023optimize, title={Optimize weight rounding via signed gradient descent for the quantization of llms}, author={Cheng, Wenhua and Zhang, Weiwei and Shen, Haihao and Cai, Yiyang and He, Xin and Lv, Kaokao and Liu, Yi}, journal={arXiv preprint arXiv:2309.05516}, year={2023} }
|
168 |
|
169 |
+
[arxiv](https://arxiv.org/abs/2309.05516) [github](https://github.com/intel/auto-round)
|
config.json
CHANGED
@@ -38,7 +38,7 @@
|
|
38 |
"scale_dtype": "torch.float16",
|
39 |
"seqlen": 2048,
|
40 |
"sym": true,
|
41 |
-
"block_name_to_quantize": "model.layers"
|
42 |
},
|
43 |
"rms_norm_eps": 1e-06,
|
44 |
"rope_scaling": {
|
@@ -53,7 +53,7 @@
|
|
53 |
"rope_theta": 1000000.0,
|
54 |
"sliding_window": 32768,
|
55 |
"tie_word_embeddings": false,
|
56 |
-
"torch_dtype": "
|
57 |
"transformers_version": "4.46.1",
|
58 |
"use_cache": true,
|
59 |
"use_sliding_window": false,
|
|
|
38 |
"scale_dtype": "torch.float16",
|
39 |
"seqlen": 2048,
|
40 |
"sym": true,
|
41 |
+
"block_name_to_quantize": "model.layers,model.language_model.layers"
|
42 |
},
|
43 |
"rms_norm_eps": 1e-06,
|
44 |
"rope_scaling": {
|
|
|
53 |
"rope_theta": 1000000.0,
|
54 |
"sliding_window": 32768,
|
55 |
"tie_word_embeddings": false,
|
56 |
+
"torch_dtype": "bfloat16",
|
57 |
"transformers_version": "4.46.1",
|
58 |
"use_cache": true,
|
59 |
"use_sliding_window": false,
|
preprocessor_config.json
CHANGED
@@ -22,8 +22,8 @@
|
|
22 |
"resample": 3,
|
23 |
"rescale_factor": 0.00392156862745098,
|
24 |
"size": {
|
25 |
-
"
|
26 |
-
"
|
27 |
},
|
28 |
"temporal_patch_size": 2
|
29 |
}
|
|
|
22 |
"resample": 3,
|
23 |
"rescale_factor": 0.00392156862745098,
|
24 |
"size": {
|
25 |
+
"shortest_edge": 3136,
|
26 |
+
"longest_edge": 12845056
|
27 |
},
|
28 |
"temporal_patch_size": 2
|
29 |
}
|