update to transfomers 4.52

Files changed (3) hide show

README.md CHANGED Viewed

@@ -17,10 +17,11 @@ The code of Qwen2-VL has been in the latest Hugging face transformers and we adv
 ```
 KeyError: 'qwen2_vl'
 ```
 ### INT4 Inference
 ```python
-from auto_round import AutoRoundConfig ## must import for auto-round format
 import requests
 from PIL import Image
 from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
@@ -165,4 +166,4 @@ The license on this model does not constitute legal advice. We are not responsib
 @article{cheng2023optimize, title={Optimize weight rounding via signed gradient descent for the quantization of llms}, author={Cheng, Wenhua and Zhang, Weiwei and Shen, Haihao and Cai, Yiyang and He, Xin and Lv, Kaokao and Liu, Yi}, journal={arXiv preprint arXiv:2309.05516}, year={2023} }
-[arxiv](https://arxiv.org/abs/2309.05516) [github](https://github.com/intel/auto-round)

 ```
 KeyError: 'qwen2_vl'
 ```
+auto-round>0.51
+transformers>=4.52.0
 ### INT4 Inference
 ```python
 import requests
 from PIL import Image
 from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
 @article{cheng2023optimize, title={Optimize weight rounding via signed gradient descent for the quantization of llms}, author={Cheng, Wenhua and Zhang, Weiwei and Shen, Haihao and Cai, Yiyang and He, Xin and Lv, Kaokao and Liu, Yi}, journal={arXiv preprint arXiv:2309.05516}, year={2023} }
+[arxiv](https://arxiv.org/abs/2309.05516) [github](https://github.com/intel/auto-round)

config.json CHANGED Viewed

@@ -38,7 +38,7 @@
     "scale_dtype": "torch.float16",
     "seqlen": 2048,
     "sym": true,
-    "block_name_to_quantize": "model.layers"
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": {
@@ -53,7 +53,7 @@
   "rope_theta": 1000000.0,
   "sliding_window": 32768,
   "tie_word_embeddings": false,
-  "torch_dtype": "float16",
   "transformers_version": "4.46.1",
   "use_cache": true,
   "use_sliding_window": false,

     "scale_dtype": "torch.float16",
     "seqlen": 2048,
     "sym": true,
+    "block_name_to_quantize": "model.layers,model.language_model.layers"
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": {
   "rope_theta": 1000000.0,
   "sliding_window": 32768,
   "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
   "transformers_version": "4.46.1",
   "use_cache": true,
   "use_sliding_window": false,

preprocessor_config.json CHANGED Viewed

@@ -22,8 +22,8 @@
   "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "size": {
-    "max_pixels": 12845056,
-    "min_pixels": 3136
   },
   "temporal_patch_size": 2
 }

   "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "size": {
+    "shortest_edge": 3136,
+    "longest_edge": 12845056
   },
   "temporal_patch_size": 2
 }