erorr
!pip install hqq==0.1.8
!pip install bitblas
import torch
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
from hqq.core.quantize import *
from hqq.utils.patching import *
from hqq.utils.generation_hf import HFGenerator
#Load the model
###################################################
model_id = 'mobiuslabsgmbh/Llama-3-8b-instruct_2bitgs64_hqq'
model = HQQModelForCausalLM.from_quantized(model_id, cache_dir='.', compute_dtype=torch.float16, adapter='adapter_v0.1.lora')
tokenizer = AutoTokenizer.from_pretrained(model_id)
patch_linearlayers(model, patch_add_quant_config,
BaseQuantizeConfig(nbits=2, group_size=64, quant_scale=False, quant_zero=False, axis=1))
model.eval();
cleanup()
/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning:
The secret HF_TOKEN
does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
warnings.warn(
Fetchingβ9βfiles:β100%
β9/9β[01:43<00:00,β20.94s/it]
.gitattributes:β100%
β1.70k/1.70kβ[00:00<00:00,β52.7kB/s]
README.md:β100%
β4.13k/4.13kβ[00:00<00:00,β120kB/s]
adapter_v0.1.lora:β100%
β83.0M/83.0Mβ[00:02<00:00,β41.2MB/s]
llama3-2bit.gif:β100%
β25.8M/25.8Mβ[00:00<00:00,β35.6MB/s]
qmodel.pt:β100%
β4.28G/4.28Gβ[01:42<00:00,β47.9MB/s]
tokenizer.json:β100%
β9.09M/9.09Mβ[00:00<00:00,β28.4MB/s]
config.json:β100%
β728/728β[00:00<00:00,β28.7kB/s]
special_tokens_map.json:β100%
β296/296β[00:00<00:00,β6.14kB/s]
tokenizer_config.json:β100%
β51.0k/51.0kβ[00:00<00:00,β1.45MB/s]
/usr/local/lib/python3.11/dist-packages/hqq/models/base.py:237: FutureWarning: You are using torch.load
with weights_only=False
(the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for weights_only
will be flipped to True
. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via torch.serialization.add_safe_globals
. We recommend you start setting weights_only=True
for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
return torch.load(cls.get_weight_file(save_dir), map_location=map_location)
0%| | 0/32 [00:00<?, ?it/s]
AttributeError Traceback (most recent call last)
in <cell line: 0>()
8 ###################################################
9 model_id = 'mobiuslabsgmbh/Llama-3-8b-instruct_2bitgs64_hqq'
---> 10 model = HQQModelForCausalLM.from_quantized(model_id, cache_dir='.', compute_dtype=torch.float16, adapter='adapter_v0.1.lora')
11 tokenizer = AutoTokenizer.from_pretrained(model_id)
12
4 frames
/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py in getattr(self, name)
1929 if name in modules:
1930 return modules[name]
-> 1931 raise AttributeError(
1932 f"'{type(self).name}' object has no attribute '{name}'"
1933 )
AttributeError: 'LlamaAttention' object has no attribute 'rotary_emb'
!pip list
DEPRECATION: Loading egg at /usr/local/lib/python3.11/dist-packages/hqq_aten-0.0.0-py3.11-linux-x86_64.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330
Package Version
absl-py 1.4.0
accelerate 1.2.1
aiohappyeyeballs 2.4.4
aiohttp 3.11.11
aiohttp-cors 0.7.0
aiosignal 1.3.2
airportsdata 20241001
alabaster 1.0.0
albucore 0.0.19
albumentations 1.4.20
ale-py 0.10.1
altair 5.5.0
annotated-types 0.7.0
anyio 3.7.1
argon2-cffi 23.1.0
argon2-cffi-bindings 21.2.0
array_record 0.6.0
arviz 0.20.0
astor 0.8.1
astropy 6.1.7
astropy-iers-data 0.2025.1.27.0.32.44
astunparse 1.6.3
atpublic 4.1.0
attrs 25.1.0
audioread 3.0.1
auto_gptq 0.7.1
autograd 1.7.0
babel 2.16.0
backcall 0.2.0
beautifulsoup4 4.12.3
bigframes 1.34.0
bigquery-magics 0.5.0
bitblas 0.1.0
blake3 1.0.4
bleach 6.2.0
blinker 1.9.0
blis 0.7.11
blosc2 3.0.0
bokeh 3.6.2
Bottleneck 1.4.2
bqplot 0.12.44
branca 0.8.1
CacheControl 0.14.2
cachetools 5.5.1
catalogue 2.0.10
certifi 2024.12.14
cffi 1.17.1
chardet 5.2.0
charset-normalizer 3.4.1
chex 0.1.88
clarabel 0.9.0
click 8.1.8
cloudpathlib 0.20.0
cloudpickle 3.1.1
cmake 3.31.4
cmdstanpy 1.2.5
colorcet 3.1.0
colorful 0.5.6
colorlover 0.3.0
colour 0.1.5
community 1.0.0b1
compressed-tensors 0.9.0
confection 0.1.5
cons 0.4.6
contourpy 1.3.1
cpplint 2.0.0
cramjam 2.9.1
cryptography 43.0.3
cuda-python 12.6.0
cudf-cu12 24.12.0
cufflinks 0.17.3
cupy-cuda12x 13.3.0
cvxopt 1.3.2
cvxpy 1.6.0
cycler 0.12.1
cyipopt 1.5.0
cymem 2.0.11
Cython 3.0.11
dask 2024.10.0
datascience 0.17.6
datasets 3.2.0
db-dtypes 1.4.0
dbus-python 1.2.18
debugpy 1.8.0
decorator 4.4.2
defusedxml 0.7.1
Deprecated 1.2.18
depyf 0.18.0
diffusers 0.32.2
dill 0.3.8
diskcache 5.6.3
distlib 0.3.9
distro 1.9.0
dlib 19.24.2
dm-tree 0.1.8
docker-pycreds 0.4.0
docstring_parser 0.16
docutils 0.21.2
dopamine_rl 4.1.2
dtlib 0.0.0.dev2
duckdb 1.1.3
earthengine-api 1.4.6
easydict 1.13
editdistance 0.8.1
eerepr 0.1.0
einops 0.8.0
en-core-web-sm 3.7.1
entrypoints 0.4
et_xmlfile 2.0.0
etils 1.11.0
etuples 0.3.9
eval_type_backport 0.2.2
execnet 2.1.1
Farama-Notifications 0.0.4
fastai 2.7.18
fastapi 0.115.8
fastcore 1.7.28
fastdownload 0.0.7
fastjsonschema 2.21.1
fastprogress 1.0.3
fastrlock 0.8.3
filelock 3.17.0
firebase-admin 6.6.0
Flask 3.1.0
flatbuffers 25.1.24
flax 0.10.2
folium 0.19.4
fonttools 4.55.7
frozendict 2.4.6
frozenlist 1.5.0
fsspec 2024.9.0
future 1.0.0
gast 0.6.0
gcsfs 2024.10.0
GDAL 3.6.4
gdown 5.2.0
geemap 0.35.1
gekko 1.2.1
gensim 4.3.3
geocoder 1.38.1
geographiclib 2.0
geopandas 1.0.1
geopy 2.4.1
gguf 0.10.0
gin-config 0.5.0
gitdb 4.0.12
GitPython 3.1.44
glob2 0.7
google 2.0.3
google-ai-generativelanguage 0.6.15
google-api-core 2.19.2
google-api-python-client 2.155.0
google-auth 2.27.0
google-auth-httplib2 0.2.0
google-auth-oauthlib 1.2.1
google-cloud-aiplatform 1.74.0
google-cloud-bigquery 3.25.0
google-cloud-bigquery-connection 1.17.0
google-cloud-bigquery-storage 2.27.0
google-cloud-bigtable 2.28.1
google-cloud-core 2.4.1
google-cloud-datastore 2.20.2
google-cloud-firestore 2.19.0
google-cloud-functions 1.19.0
google-cloud-iam 2.17.0
google-cloud-language 2.16.0
google-cloud-pubsub 2.25.0
google-cloud-resource-manager 1.14.0
google-cloud-spanner 3.51.0
google-cloud-storage 2.19.0
google-cloud-translate 3.19.0
google-colab 1.0.0
google-crc32c 1.6.0
google-genai 0.3.0
google-generativeai 0.8.4
google-pasta 0.2.0
google-resumable-media 2.7.2
googleapis-common-protos 1.66.0
googledrivedownloader 0.4
graphviz 0.20.3
greenlet 3.1.1
grpc-google-iam-v1 0.14.0
grpc-interceptor 0.15.4
grpcio 1.70.0
grpcio-status 1.62.3
gspread 6.1.4
gspread-dataframe 4.0.0
gym 0.25.2
gym-notices 0.0.8
gymnasium 1.0.0
h11 0.14.0
h5netcdf 1.5.0
h5py 3.12.1
highspy 1.9.0
holidays 0.65
holoviews 1.20.0
hqq 0.1.8
hqq_aten 0.0.0
hqq_aten 0.0.0
html5lib 1.1
httpcore 1.0.7
httpimport 1.4.0
httplib2 0.22.0
httptools 0.6.4
httpx 0.28.1
huggingface-hub 0.27.1
humanize 4.11.0
hyperopt 0.2.7
ibis-framework 9.2.0
idna 3.10
imageio 2.36.1
imageio-ffmpeg 0.6.0
imagesize 1.4.1
imbalanced-learn 0.13.0
imgaug 0.4.0
immutabledict 4.2.1
importlib_metadata 8.6.1
importlib_resources 6.5.2
imutils 0.5.4
inflect 7.5.0
iniconfig 2.0.0
intel-cmplr-lib-ur 2025.0.4
intel-openmp 2025.0.4
interegular 0.3.3
ipyevents 2.0.2
ipyfilechooser 0.6.0
ipykernel 5.5.6
ipyleaflet 0.19.2
ipyparallel 8.8.0
ipython 7.34.0
ipython-genutils 0.2.0
ipython-sql 0.5.0
ipytree 0.2.2
ipywidgets 7.7.1
itsdangerous 2.2.0
jax 0.4.33
jax-cuda12-pjrt 0.4.33
jax-cuda12-plugin 0.4.33
jaxlib 0.4.33
jeepney 0.7.1
jellyfish 1.1.0
jieba 0.42.1
Jinja2 3.1.5
jiter 0.8.2
joblib 1.4.2
jsonpatch 1.33
jsonpickle 4.0.1
jsonpointer 3.0.0
jsonschema 4.23.0
jsonschema-specifications 2024.10.1
jupyter-client 6.1.12
jupyter-console 6.1.0
jupyter_core 5.7.2
jupyter-leaflet 0.19.2
jupyter-server 1.24.0
jupyterlab_pygments 0.3.0
jupyterlab_widgets 3.0.13
kaggle 1.6.17
kagglehub 0.3.6
keras 3.8.0
keras-hub 0.18.1
keras-nlp 0.18.1
keyring 23.5.0
kiwisolver 1.4.8
langchain 0.3.16
langchain-core 0.3.32
langchain-text-splitters 0.3.5
langcodes 3.5.0
langsmith 0.3.2
language_data 1.3.0
lark 1.2.2
launchpadlib 1.10.16
lazr.restfulclient 0.14.4
lazr.uri 1.0.6
lazy_loader 0.4
libclang 18.1.1
libcudf-cu12 24.12.0
libkvikio-cu12 24.12.1
librosa 0.10.2.post1
lightgbm 4.5.0
linkify-it-py 2.0.3
llvmlite 0.43.0
lm-format-enforcer 0.10.9
locket 1.0.0
logical-unification 0.4.6
lxml 5.3.0
marisa-trie 1.2.1
Markdown 3.7
markdown-it-py 3.0.0
MarkupSafe 3.0.2
matplotlib 3.10.0
matplotlib-inline 0.1.7
matplotlib-venn 1.1.1
mdit-py-plugins 0.4.2
mdurl 0.1.2
miniKanren 1.0.3
missingno 0.5.2
mistral_common 1.5.2
mistune 3.1.1
mizani 0.13.1
mkl 2025.0.1
ml-dtypes 0.4.1
mlxtend 0.23.4
more-itertools 10.5.0
moviepy 1.0.3
mpmath 1.3.0
msgpack 1.1.0
msgspec 0.19.0
multidict 6.1.0
multipledispatch 1.0.0
multiprocess 0.70.16
multitasking 0.0.11
murmurhash 1.0.12
music21 9.3.0
namex 0.0.8
narwhals 1.24.1
natsort 8.4.0
nbclassic 1.2.0
nbclient 0.10.2
nbconvert 7.16.6
nbformat 5.10.4
ndindex 1.9.2
nest-asyncio 1.6.0
networkx 3.4.2
nibabel 5.3.2
nltk 3.9.1
notebook 6.5.5
notebook_shim 0.2.4
numba 0.60.0
numba-cuda 0.0.17.1
numexpr 2.10.2
numpy 1.26.4
nvidia-cublas-cu12 12.4.5.8
nvidia-cuda-cupti-cu12 12.4.127
nvidia-cuda-nvcc-cu12 12.5.82
nvidia-cuda-nvrtc-cu12 12.4.127
nvidia-cuda-runtime-cu12 12.4.127
nvidia-cudnn-cu12 9.1.0.70
nvidia-cufft-cu12 11.2.1.3
nvidia-curand-cu12 10.3.5.147
nvidia-cusolver-cu12 11.6.1.9
nvidia-cusparse-cu12 12.3.1.170
nvidia-ml-py 12.570.86
nvidia-nccl-cu12 2.21.5
nvidia-nvcomp-cu12 4.1.0.6
nvidia-nvjitlink-cu12 12.4.127
nvidia-nvtx-cu12 12.4.127
nvtx 0.2.10
nx-cugraph-cu12 24.12.0
oauth2client 4.1.3
oauthlib 3.2.2
openai 1.59.9
opencensus 0.11.4
opencensus-context 0.1.3
opencv-contrib-python 4.10.0.84
opencv-python 4.10.0.84
opencv-python-headless 4.11.0.86
openpyxl 3.1.5
opentelemetry-api 1.16.0
opentelemetry-sdk 1.16.0
opentelemetry-semantic-conventions 0.37b0
opt_einsum 3.4.0
optax 0.2.4
optree 0.14.0
orbax-checkpoint 0.6.4
orjson 3.10.15
osqp 0.6.7.post3
outlines 0.1.11
outlines_core 0.1.26
packaging 24.2
pandas 2.2.2
pandas-datareader 0.10.0
pandas-gbq 0.26.1
pandas-stubs 2.2.2.240909
pandocfilters 1.5.1
panel 1.6.0
param 2.2.0
parso 0.8.4
parsy 2.1
partd 1.4.2
partial-json-parser 0.2.1.1.post5
pathlib 1.0.1
patsy 1.0.1
peewee 3.17.8
peft 0.14.0
pexpect 4.9.0
pickleshare 0.7.5
pillow 10.4.0
pip 24.1.2
platformdirs 4.3.6
plotly 5.24.1
plotnine 0.14.5
pluggy 1.5.0
ply 3.11
polars 1.9.0
pooch 1.8.2
portpicker 1.5.2
preshed 3.0.9
prettytable 3.13.0
proglog 0.1.10
progressbar2 4.5.0
prometheus_client 0.21.1
prometheus-fastapi-instrumentator 7.0.2
promise 2.3
prompt_toolkit 3.0.50
propcache 0.2.1
prophet 1.1.6
proto-plus 1.26.0
protobuf 4.25.6
psutil 5.9.5
psycopg2 2.9.10
ptyprocess 0.7.0
py-cpuinfo 9.0.0
py-spy 0.4.0
py4j 0.10.9.7
pyarrow 17.0.0
pyasn1 0.6.1
pyasn1_modules 0.4.1
pybind11 2.13.6
pycocotools 2.0.8
pycountry 24.6.1
pycparser 2.22
pydantic 2.10.6
pydantic_core 2.27.2
pydata-google-auth 1.9.1
pydot 3.0.4
pydotplus 2.0.2
PyDrive 1.3.1
PyDrive2 1.21.3
pyerfa 2.0.1.5
pygame 2.6.1
pygit2 1.16.0
Pygments 2.18.0
PyGObject 3.42.1
PyJWT 2.10.1
pylibcudf-cu12 24.12.0
pylibcugraph-cu12 24.12.0
pylibraft-cu12 24.12.0
pymc 5.19.1
pymystem3 0.2.0
pynvjitlink-cu12 0.5.0
pyogrio 0.10.0
Pyomo 6.8.2
PyOpenGL 3.1.9
pyOpenSSL 24.2.1
pyparsing 3.2.1
pyperclip 1.9.0
pyproj 3.7.0
pyshp 2.3.1
PySocks 1.7.1
pyspark 3.5.4
pytensor 2.26.4
pytest 8.3.4
pytest-xdist 3.6.1
python-apt 0.0.0
python-box 7.3.2
python-dateutil 2.8.2
python-dotenv 1.0.1
python-louvain 0.16
python-slugify 8.0.4
python-snappy 0.7.3
python-utils 3.9.1
pytz 2024.2
pyviz_comms 3.0.4
PyYAML 6.0.2
pyzmq 24.0.1
qdldl 0.1.7.post5
RapidFuzz 3.12.1
ratelim 0.1.6
ray 2.42.0
referencing 0.36.2
regex 2024.11.6
requests 2.32.3
requests-oauthlib 1.3.1
requests-toolbelt 1.0.0
requirements-parser 0.9.0
rich 13.9.4
rmm-cu12 24.12.1
rouge 1.0.1
rpds-py 0.22.3
rpy2 3.4.2
rsa 4.9
safetensors 0.5.2
scikit-image 0.25.1
scikit-learn 1.6.1
scipy 1.13.1
scooby 0.10.0
scs 3.2.7.post2
seaborn 0.13.2
SecretStorage 3.3.1
Send2Trash 1.8.3
sentence-transformers 3.3.1
sentencepiece 0.2.0
sentry-sdk 2.20.0
setproctitle 1.3.4
setuptools 75.1.0
shap 0.46.0
shapely 2.0.6
shellingham 1.5.4
simple-parsing 0.1.7
six 1.17.0
sklearn-compat 0.1.3
sklearn-pandas 2.2.0
slicer 0.0.8
smart-open 7.1.0
smmap 5.0.2
sniffio 1.3.1
snowballstemmer 2.2.0
soundfile 0.13.1
soupsieve 2.6
soxr 0.5.0.post1
spacy 3.7.5
spacy-legacy 3.0.12
spacy-loggers 1.0.5
spanner-graph-notebook 1.0.9
Sphinx 8.1.3
sphinxcontrib-applehelp 2.0.0
sphinxcontrib-devhelp 2.0.0
sphinxcontrib-htmlhelp 2.1.0
sphinxcontrib-jsmath 1.0.1
sphinxcontrib-qthelp 2.0.0
sphinxcontrib-serializinghtml 2.0.0
SQLAlchemy 2.0.37
sqlglot 25.6.1
sqlparse 0.5.3
srsly 2.5.1
stanio 0.5.1
starlette 0.45.3
statsmodels 0.14.4
stringzilla 3.11.3
sympy 1.13.1
tables 3.10.2
tabulate 0.9.0
tbb 2022.0.0
tcmlib 1.2.0
tenacity 9.0.0
tensorboard 2.18.0
tensorboard-data-server 0.7.2
tensorflow 2.18.0
tensorflow-datasets 4.9.7
tensorflow-hub 0.16.1
tensorflow-io-gcs-filesystem 0.37.1
tensorflow-metadata 1.16.1
tensorflow-probability 0.24.0
tensorflow-text 2.18.1
tensorstore 0.1.71
termcolor 2.5.0
terminado 0.18.1
text-unidecode 1.3
textblob 0.17.1
tf_keras 2.18.0
tf-slim 1.1.0
thefuzz 0.22.1
thinc 8.2.5
threadpoolctl 3.5.0
tifffile 2025.1.10
tiktoken 0.7.0
timm 1.0.14
tinycss2 1.4.0
tokenizers 0.21.0
toml 0.10.2
toolz 0.12.1
torch 2.5.1+cu124
torchaudio 2.5.1+cu124
torchsummary 1.5.1
torchvision 0.20.1+cu124
tornado 6.4.2
tqdm 4.67.1
traitlets 5.7.1
traittypes 0.2.1
transformers 4.48.2
triton 3.1.0
tweepy 4.14.0
typeguard 4.4.1
typer 0.15.1
types-pytz 2024.2.0.20241221
types-setuptools 75.8.0.20250110
typing_extensions 4.12.2
tzdata 2025.1
tzlocal 5.2
uc-micro-py 1.0.3
umf 0.9.1
uritemplate 4.1.1
urllib3 2.3.0
uvicorn 0.34.0
uvloop 0.21.0
vega-datasets 0.9.0
virtualenv 20.29.1
vllm 0.7.1
wadllib 1.3.6
wandb 0.19.5
wasabi 1.1.3
watchfiles 1.0.4
wcwidth 0.2.13
weasel 0.4.1
webcolors 24.11.1
webencodings 0.5.1
websocket-client 1.8.0
websockets 14.2
Werkzeug 3.1.3
wheel 0.45.1
widgetsnbextension 3.6.10
wordcloud 1.9.4
wrapt 1.17.2
xarray 2025.1.1
xarray-einstats 0.8.0
xformers 0.0.28.post3
xgboost 2.1.3
xgrammar 0.1.11
xlrd 2.0.1
xxhash 3.5.0
xyzservices 2025.1.0
yarl 1.18.3
yellowbrick 1.5
yfinance 0.2.52
zipp 3.21.0
zstandard 0.23.0
colab t4
import torch
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
from hqq.core.quantize import *
from hqq.utils.patching import *
from hqq.utils.generation_hf import HFGenerator
Load the model
###################################################
model_id = 'mobiuslabsgmbh/Llama-3-8b-instruct_2bitgs64_hqq'
model = HQQModelForCausalLM.from_quantized(model_id, cache_dir='.', compute_dtype=torch.float16, adapter='adapter_v0.1.lora')
tokenizer = AutoTokenizer.from_pretrained(model_id)
patch_linearlayers(model, patch_add_quant_config,
BaseQuantizeConfig(nbits=2, group_size=64, quant_scale=False, quant_zero=False, axis=1))
model.eval();
cleanup()
Use optimized inference kernels
###################################################
HQQLinear.set_backend(HQQBackend.PYTORCH)
prepare_for_inference(model) #default backend
prepare_for_inference(model, backend="bitblas", allow_merge=False) #It takes a while...
Generate
###################################################
For longer context, make sure to allocate enough cache via the cache_size= parameter
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile=None) #Slower generation but no warm-up
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Faster generation, but warm-up takes a while
gen.generate("Write an essay about large language models", print_tokens=True)
gen.generate("Tell me a funny joke!", print_tokens=True)
gen.generate("How to make a yummy chocolate cake?", print_tokens=True)
/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning:
The secret HF_TOKEN
does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
warnings.warn(
Fetchingβ9βfiles:β100%
β9/9β[00:00<00:00,β311.93it/s]
/usr/local/lib/python3.11/dist-packages/hqq/models/base.py:237: FutureWarning: You are using torch.load
with weights_only=False
(the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for weights_only
will be flipped to True
. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via torch.serialization.add_safe_globals
. We recommend you start setting weights_only=True
for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
return torch.load(cls.get_weight_file(save_dir), map_location=map_location)
0%| | 0/32 [00:00<?, ?it/s]
AttributeError Traceback (most recent call last)
in <cell line: 0>()
8 ###################################################
9 model_id = 'mobiuslabsgmbh/Llama-3-8b-instruct_2bitgs64_hqq'
---> 10 model = HQQModelForCausalLM.from_quantized(model_id, cache_dir='.', compute_dtype=torch.float16, adapter='adapter_v0.1.lora')
11 tokenizer = AutoTokenizer.from_pretrained(model_id)
12
4 frames
/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py in getattr(self, name)
1727 if name in modules:
1728 return modules[name]
-> 1729 raise AttributeError(f"'{type(self).name}' object has no attribute '{name}'")
1730
1731 def setattr(self, name: str, value: Union[Tensor, 'Module']) -> None:
AttributeError: 'LlamaAttention' object has no attribute 'rotary_emb'
Same comment as before: make sure you use an updated version of transformers and a compatible GPU