Text Generation
Transformers

erorr

#2
by rakmik - opened

!pip install hqq==0.1.8
!pip install bitblas

import torch
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
from hqq.core.quantize import *
from hqq.utils.patching import *
from hqq.utils.generation_hf import HFGenerator

#Load the model
###################################################
model_id = 'mobiuslabsgmbh/Llama-3-8b-instruct_2bitgs64_hqq'
model = HQQModelForCausalLM.from_quantized(model_id, cache_dir='.', compute_dtype=torch.float16, adapter='adapter_v0.1.lora')
tokenizer = AutoTokenizer.from_pretrained(model_id)

patch_linearlayers(model, patch_add_quant_config,
BaseQuantizeConfig(nbits=2, group_size=64, quant_scale=False, quant_zero=False, axis=1))

model.eval();
cleanup()

/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning:
The secret HF_TOKEN does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
warnings.warn(
Fetching 9 files: 100%
 9/9 [01:43<00:00, 20.94s/it]
.gitattributes: 100%
 1.70k/1.70k [00:00<00:00, 52.7kB/s]
README.md: 100%
 4.13k/4.13k [00:00<00:00, 120kB/s]
adapter_v0.1.lora: 100%
 83.0M/83.0M [00:02<00:00, 41.2MB/s]
llama3-2bit.gif: 100%
 25.8M/25.8M [00:00<00:00, 35.6MB/s]
qmodel.pt: 100%
 4.28G/4.28G [01:42<00:00, 47.9MB/s]
tokenizer.json: 100%
 9.09M/9.09M [00:00<00:00, 28.4MB/s]
config.json: 100%
 728/728 [00:00<00:00, 28.7kB/s]
special_tokens_map.json: 100%
 296/296 [00:00<00:00, 6.14kB/s]
tokenizer_config.json: 100%
 51.0k/51.0k [00:00<00:00, 1.45MB/s]
/usr/local/lib/python3.11/dist-packages/hqq/models/base.py:237: FutureWarning: You are using torch.load with weights_only=False (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for weights_only will be flipped to True. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via torch.serialization.add_safe_globals. We recommend you start setting weights_only=True for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
return torch.load(cls.get_weight_file(save_dir), map_location=map_location)
0%| | 0/32 [00:00<?, ?it/s]

AttributeError Traceback (most recent call last)
in <cell line: 0>()
8 ###################################################
9 model_id = 'mobiuslabsgmbh/Llama-3-8b-instruct_2bitgs64_hqq'
---> 10 model = HQQModelForCausalLM.from_quantized(model_id, cache_dir='.', compute_dtype=torch.float16, adapter='adapter_v0.1.lora')
11 tokenizer = AutoTokenizer.from_pretrained(model_id)
12

4 frames
/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py in getattr(self, name)
1929 if name in modules:
1930 return modules[name]
-> 1931 raise AttributeError(
1932 f"'{type(self).name}' object has no attribute '{name}'"
1933 )

AttributeError: 'LlamaAttention' object has no attribute 'rotary_emb'

!pip list

DEPRECATION: Loading egg at /usr/local/lib/python3.11/dist-packages/hqq_aten-0.0.0-py3.11-linux-x86_64.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330
Package Version


absl-py 1.4.0
accelerate 1.2.1
aiohappyeyeballs 2.4.4
aiohttp 3.11.11
aiohttp-cors 0.7.0
aiosignal 1.3.2
airportsdata 20241001
alabaster 1.0.0
albucore 0.0.19
albumentations 1.4.20
ale-py 0.10.1
altair 5.5.0
annotated-types 0.7.0
anyio 3.7.1
argon2-cffi 23.1.0
argon2-cffi-bindings 21.2.0
array_record 0.6.0
arviz 0.20.0
astor 0.8.1
astropy 6.1.7
astropy-iers-data 0.2025.1.27.0.32.44
astunparse 1.6.3
atpublic 4.1.0
attrs 25.1.0
audioread 3.0.1
auto_gptq 0.7.1
autograd 1.7.0
babel 2.16.0
backcall 0.2.0
beautifulsoup4 4.12.3
bigframes 1.34.0
bigquery-magics 0.5.0
bitblas 0.1.0
blake3 1.0.4
bleach 6.2.0
blinker 1.9.0
blis 0.7.11
blosc2 3.0.0
bokeh 3.6.2
Bottleneck 1.4.2
bqplot 0.12.44
branca 0.8.1
CacheControl 0.14.2
cachetools 5.5.1
catalogue 2.0.10
certifi 2024.12.14
cffi 1.17.1
chardet 5.2.0
charset-normalizer 3.4.1
chex 0.1.88
clarabel 0.9.0
click 8.1.8
cloudpathlib 0.20.0
cloudpickle 3.1.1
cmake 3.31.4
cmdstanpy 1.2.5
colorcet 3.1.0
colorful 0.5.6
colorlover 0.3.0
colour 0.1.5
community 1.0.0b1
compressed-tensors 0.9.0
confection 0.1.5
cons 0.4.6
contourpy 1.3.1
cpplint 2.0.0
cramjam 2.9.1
cryptography 43.0.3
cuda-python 12.6.0
cudf-cu12 24.12.0
cufflinks 0.17.3
cupy-cuda12x 13.3.0
cvxopt 1.3.2
cvxpy 1.6.0
cycler 0.12.1
cyipopt 1.5.0
cymem 2.0.11
Cython 3.0.11
dask 2024.10.0
datascience 0.17.6
datasets 3.2.0
db-dtypes 1.4.0
dbus-python 1.2.18
debugpy 1.8.0
decorator 4.4.2
defusedxml 0.7.1
Deprecated 1.2.18
depyf 0.18.0
diffusers 0.32.2
dill 0.3.8
diskcache 5.6.3
distlib 0.3.9
distro 1.9.0
dlib 19.24.2
dm-tree 0.1.8
docker-pycreds 0.4.0
docstring_parser 0.16
docutils 0.21.2
dopamine_rl 4.1.2
dtlib 0.0.0.dev2
duckdb 1.1.3
earthengine-api 1.4.6
easydict 1.13
editdistance 0.8.1
eerepr 0.1.0
einops 0.8.0
en-core-web-sm 3.7.1
entrypoints 0.4
et_xmlfile 2.0.0
etils 1.11.0
etuples 0.3.9
eval_type_backport 0.2.2
execnet 2.1.1
Farama-Notifications 0.0.4
fastai 2.7.18
fastapi 0.115.8
fastcore 1.7.28
fastdownload 0.0.7
fastjsonschema 2.21.1
fastprogress 1.0.3
fastrlock 0.8.3
filelock 3.17.0
firebase-admin 6.6.0
Flask 3.1.0
flatbuffers 25.1.24
flax 0.10.2
folium 0.19.4
fonttools 4.55.7
frozendict 2.4.6
frozenlist 1.5.0
fsspec 2024.9.0
future 1.0.0
gast 0.6.0
gcsfs 2024.10.0
GDAL 3.6.4
gdown 5.2.0
geemap 0.35.1
gekko 1.2.1
gensim 4.3.3
geocoder 1.38.1
geographiclib 2.0
geopandas 1.0.1
geopy 2.4.1
gguf 0.10.0
gin-config 0.5.0
gitdb 4.0.12
GitPython 3.1.44
glob2 0.7
google 2.0.3
google-ai-generativelanguage 0.6.15
google-api-core 2.19.2
google-api-python-client 2.155.0
google-auth 2.27.0
google-auth-httplib2 0.2.0
google-auth-oauthlib 1.2.1
google-cloud-aiplatform 1.74.0
google-cloud-bigquery 3.25.0
google-cloud-bigquery-connection 1.17.0
google-cloud-bigquery-storage 2.27.0
google-cloud-bigtable 2.28.1
google-cloud-core 2.4.1
google-cloud-datastore 2.20.2
google-cloud-firestore 2.19.0
google-cloud-functions 1.19.0
google-cloud-iam 2.17.0
google-cloud-language 2.16.0
google-cloud-pubsub 2.25.0
google-cloud-resource-manager 1.14.0
google-cloud-spanner 3.51.0
google-cloud-storage 2.19.0
google-cloud-translate 3.19.0
google-colab 1.0.0
google-crc32c 1.6.0
google-genai 0.3.0
google-generativeai 0.8.4
google-pasta 0.2.0
google-resumable-media 2.7.2
googleapis-common-protos 1.66.0
googledrivedownloader 0.4
graphviz 0.20.3
greenlet 3.1.1
grpc-google-iam-v1 0.14.0
grpc-interceptor 0.15.4
grpcio 1.70.0
grpcio-status 1.62.3
gspread 6.1.4
gspread-dataframe 4.0.0
gym 0.25.2
gym-notices 0.0.8
gymnasium 1.0.0
h11 0.14.0
h5netcdf 1.5.0
h5py 3.12.1
highspy 1.9.0
holidays 0.65
holoviews 1.20.0
hqq 0.1.8
hqq_aten 0.0.0
hqq_aten 0.0.0
html5lib 1.1
httpcore 1.0.7
httpimport 1.4.0
httplib2 0.22.0
httptools 0.6.4
httpx 0.28.1
huggingface-hub 0.27.1
humanize 4.11.0
hyperopt 0.2.7
ibis-framework 9.2.0
idna 3.10
imageio 2.36.1
imageio-ffmpeg 0.6.0
imagesize 1.4.1
imbalanced-learn 0.13.0
imgaug 0.4.0
immutabledict 4.2.1
importlib_metadata 8.6.1
importlib_resources 6.5.2
imutils 0.5.4
inflect 7.5.0
iniconfig 2.0.0
intel-cmplr-lib-ur 2025.0.4
intel-openmp 2025.0.4
interegular 0.3.3
ipyevents 2.0.2
ipyfilechooser 0.6.0
ipykernel 5.5.6
ipyleaflet 0.19.2
ipyparallel 8.8.0
ipython 7.34.0
ipython-genutils 0.2.0
ipython-sql 0.5.0
ipytree 0.2.2
ipywidgets 7.7.1
itsdangerous 2.2.0
jax 0.4.33
jax-cuda12-pjrt 0.4.33
jax-cuda12-plugin 0.4.33
jaxlib 0.4.33
jeepney 0.7.1
jellyfish 1.1.0
jieba 0.42.1
Jinja2 3.1.5
jiter 0.8.2
joblib 1.4.2
jsonpatch 1.33
jsonpickle 4.0.1
jsonpointer 3.0.0
jsonschema 4.23.0
jsonschema-specifications 2024.10.1
jupyter-client 6.1.12
jupyter-console 6.1.0
jupyter_core 5.7.2
jupyter-leaflet 0.19.2
jupyter-server 1.24.0
jupyterlab_pygments 0.3.0
jupyterlab_widgets 3.0.13
kaggle 1.6.17
kagglehub 0.3.6
keras 3.8.0
keras-hub 0.18.1
keras-nlp 0.18.1
keyring 23.5.0
kiwisolver 1.4.8
langchain 0.3.16
langchain-core 0.3.32
langchain-text-splitters 0.3.5
langcodes 3.5.0
langsmith 0.3.2
language_data 1.3.0
lark 1.2.2
launchpadlib 1.10.16
lazr.restfulclient 0.14.4
lazr.uri 1.0.6
lazy_loader 0.4
libclang 18.1.1
libcudf-cu12 24.12.0
libkvikio-cu12 24.12.1
librosa 0.10.2.post1
lightgbm 4.5.0
linkify-it-py 2.0.3
llvmlite 0.43.0
lm-format-enforcer 0.10.9
locket 1.0.0
logical-unification 0.4.6
lxml 5.3.0
marisa-trie 1.2.1
Markdown 3.7
markdown-it-py 3.0.0
MarkupSafe 3.0.2
matplotlib 3.10.0
matplotlib-inline 0.1.7
matplotlib-venn 1.1.1
mdit-py-plugins 0.4.2
mdurl 0.1.2
miniKanren 1.0.3
missingno 0.5.2
mistral_common 1.5.2
mistune 3.1.1
mizani 0.13.1
mkl 2025.0.1
ml-dtypes 0.4.1
mlxtend 0.23.4
more-itertools 10.5.0
moviepy 1.0.3
mpmath 1.3.0
msgpack 1.1.0
msgspec 0.19.0
multidict 6.1.0
multipledispatch 1.0.0
multiprocess 0.70.16
multitasking 0.0.11
murmurhash 1.0.12
music21 9.3.0
namex 0.0.8
narwhals 1.24.1
natsort 8.4.0
nbclassic 1.2.0
nbclient 0.10.2
nbconvert 7.16.6
nbformat 5.10.4
ndindex 1.9.2
nest-asyncio 1.6.0
networkx 3.4.2
nibabel 5.3.2
nltk 3.9.1
notebook 6.5.5
notebook_shim 0.2.4
numba 0.60.0
numba-cuda 0.0.17.1
numexpr 2.10.2
numpy 1.26.4
nvidia-cublas-cu12 12.4.5.8
nvidia-cuda-cupti-cu12 12.4.127
nvidia-cuda-nvcc-cu12 12.5.82
nvidia-cuda-nvrtc-cu12 12.4.127
nvidia-cuda-runtime-cu12 12.4.127
nvidia-cudnn-cu12 9.1.0.70
nvidia-cufft-cu12 11.2.1.3
nvidia-curand-cu12 10.3.5.147
nvidia-cusolver-cu12 11.6.1.9
nvidia-cusparse-cu12 12.3.1.170
nvidia-ml-py 12.570.86
nvidia-nccl-cu12 2.21.5
nvidia-nvcomp-cu12 4.1.0.6
nvidia-nvjitlink-cu12 12.4.127
nvidia-nvtx-cu12 12.4.127
nvtx 0.2.10
nx-cugraph-cu12 24.12.0
oauth2client 4.1.3
oauthlib 3.2.2
openai 1.59.9
opencensus 0.11.4
opencensus-context 0.1.3
opencv-contrib-python 4.10.0.84
opencv-python 4.10.0.84
opencv-python-headless 4.11.0.86
openpyxl 3.1.5
opentelemetry-api 1.16.0
opentelemetry-sdk 1.16.0
opentelemetry-semantic-conventions 0.37b0
opt_einsum 3.4.0
optax 0.2.4
optree 0.14.0
orbax-checkpoint 0.6.4
orjson 3.10.15
osqp 0.6.7.post3
outlines 0.1.11
outlines_core 0.1.26
packaging 24.2
pandas 2.2.2
pandas-datareader 0.10.0
pandas-gbq 0.26.1
pandas-stubs 2.2.2.240909
pandocfilters 1.5.1
panel 1.6.0
param 2.2.0
parso 0.8.4
parsy 2.1
partd 1.4.2
partial-json-parser 0.2.1.1.post5
pathlib 1.0.1
patsy 1.0.1
peewee 3.17.8
peft 0.14.0
pexpect 4.9.0
pickleshare 0.7.5
pillow 10.4.0
pip 24.1.2
platformdirs 4.3.6
plotly 5.24.1
plotnine 0.14.5
pluggy 1.5.0
ply 3.11
polars 1.9.0
pooch 1.8.2
portpicker 1.5.2
preshed 3.0.9
prettytable 3.13.0
proglog 0.1.10
progressbar2 4.5.0
prometheus_client 0.21.1
prometheus-fastapi-instrumentator 7.0.2
promise 2.3
prompt_toolkit 3.0.50
propcache 0.2.1
prophet 1.1.6
proto-plus 1.26.0
protobuf 4.25.6
psutil 5.9.5
psycopg2 2.9.10
ptyprocess 0.7.0
py-cpuinfo 9.0.0
py-spy 0.4.0
py4j 0.10.9.7
pyarrow 17.0.0
pyasn1 0.6.1
pyasn1_modules 0.4.1
pybind11 2.13.6
pycocotools 2.0.8
pycountry 24.6.1
pycparser 2.22
pydantic 2.10.6
pydantic_core 2.27.2
pydata-google-auth 1.9.1
pydot 3.0.4
pydotplus 2.0.2
PyDrive 1.3.1
PyDrive2 1.21.3
pyerfa 2.0.1.5
pygame 2.6.1
pygit2 1.16.0
Pygments 2.18.0
PyGObject 3.42.1
PyJWT 2.10.1
pylibcudf-cu12 24.12.0
pylibcugraph-cu12 24.12.0
pylibraft-cu12 24.12.0
pymc 5.19.1
pymystem3 0.2.0
pynvjitlink-cu12 0.5.0
pyogrio 0.10.0
Pyomo 6.8.2
PyOpenGL 3.1.9
pyOpenSSL 24.2.1
pyparsing 3.2.1
pyperclip 1.9.0
pyproj 3.7.0
pyshp 2.3.1
PySocks 1.7.1
pyspark 3.5.4
pytensor 2.26.4
pytest 8.3.4
pytest-xdist 3.6.1
python-apt 0.0.0
python-box 7.3.2
python-dateutil 2.8.2
python-dotenv 1.0.1
python-louvain 0.16
python-slugify 8.0.4
python-snappy 0.7.3
python-utils 3.9.1
pytz 2024.2
pyviz_comms 3.0.4
PyYAML 6.0.2
pyzmq 24.0.1
qdldl 0.1.7.post5
RapidFuzz 3.12.1
ratelim 0.1.6
ray 2.42.0
referencing 0.36.2
regex 2024.11.6
requests 2.32.3
requests-oauthlib 1.3.1
requests-toolbelt 1.0.0
requirements-parser 0.9.0
rich 13.9.4
rmm-cu12 24.12.1
rouge 1.0.1
rpds-py 0.22.3
rpy2 3.4.2
rsa 4.9
safetensors 0.5.2
scikit-image 0.25.1
scikit-learn 1.6.1
scipy 1.13.1
scooby 0.10.0
scs 3.2.7.post2
seaborn 0.13.2
SecretStorage 3.3.1
Send2Trash 1.8.3
sentence-transformers 3.3.1
sentencepiece 0.2.0
sentry-sdk 2.20.0
setproctitle 1.3.4
setuptools 75.1.0
shap 0.46.0
shapely 2.0.6
shellingham 1.5.4
simple-parsing 0.1.7
six 1.17.0
sklearn-compat 0.1.3
sklearn-pandas 2.2.0
slicer 0.0.8
smart-open 7.1.0
smmap 5.0.2
sniffio 1.3.1
snowballstemmer 2.2.0
soundfile 0.13.1
soupsieve 2.6
soxr 0.5.0.post1
spacy 3.7.5
spacy-legacy 3.0.12
spacy-loggers 1.0.5
spanner-graph-notebook 1.0.9
Sphinx 8.1.3
sphinxcontrib-applehelp 2.0.0
sphinxcontrib-devhelp 2.0.0
sphinxcontrib-htmlhelp 2.1.0
sphinxcontrib-jsmath 1.0.1
sphinxcontrib-qthelp 2.0.0
sphinxcontrib-serializinghtml 2.0.0
SQLAlchemy 2.0.37
sqlglot 25.6.1
sqlparse 0.5.3
srsly 2.5.1
stanio 0.5.1
starlette 0.45.3
statsmodels 0.14.4
stringzilla 3.11.3
sympy 1.13.1
tables 3.10.2
tabulate 0.9.0
tbb 2022.0.0
tcmlib 1.2.0
tenacity 9.0.0
tensorboard 2.18.0
tensorboard-data-server 0.7.2
tensorflow 2.18.0
tensorflow-datasets 4.9.7
tensorflow-hub 0.16.1
tensorflow-io-gcs-filesystem 0.37.1
tensorflow-metadata 1.16.1
tensorflow-probability 0.24.0
tensorflow-text 2.18.1
tensorstore 0.1.71
termcolor 2.5.0
terminado 0.18.1
text-unidecode 1.3
textblob 0.17.1
tf_keras 2.18.0
tf-slim 1.1.0
thefuzz 0.22.1
thinc 8.2.5
threadpoolctl 3.5.0
tifffile 2025.1.10
tiktoken 0.7.0
timm 1.0.14
tinycss2 1.4.0
tokenizers 0.21.0
toml 0.10.2
toolz 0.12.1
torch 2.5.1+cu124
torchaudio 2.5.1+cu124
torchsummary 1.5.1
torchvision 0.20.1+cu124
tornado 6.4.2
tqdm 4.67.1
traitlets 5.7.1
traittypes 0.2.1
transformers 4.48.2
triton 3.1.0
tweepy 4.14.0
typeguard 4.4.1
typer 0.15.1
types-pytz 2024.2.0.20241221
types-setuptools 75.8.0.20250110
typing_extensions 4.12.2
tzdata 2025.1
tzlocal 5.2
uc-micro-py 1.0.3
umf 0.9.1
uritemplate 4.1.1
urllib3 2.3.0
uvicorn 0.34.0
uvloop 0.21.0
vega-datasets 0.9.0
virtualenv 20.29.1
vllm 0.7.1
wadllib 1.3.6
wandb 0.19.5
wasabi 1.1.3
watchfiles 1.0.4
wcwidth 0.2.13
weasel 0.4.1
webcolors 24.11.1
webencodings 0.5.1
websocket-client 1.8.0
websockets 14.2
Werkzeug 3.1.3
wheel 0.45.1
widgetsnbextension 3.6.10
wordcloud 1.9.4
wrapt 1.17.2
xarray 2025.1.1
xarray-einstats 0.8.0
xformers 0.0.28.post3
xgboost 2.1.3
xgrammar 0.1.11
xlrd 2.0.1
xxhash 3.5.0
xyzservices 2025.1.0
yarl 1.18.3
yellowbrick 1.5
yfinance 0.2.52
zipp 3.21.0
zstandard 0.23.0

colab t4

import torch
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
from hqq.core.quantize import *
from hqq.utils.patching import *
from hqq.utils.generation_hf import HFGenerator

Load the model

###################################################
model_id = 'mobiuslabsgmbh/Llama-3-8b-instruct_2bitgs64_hqq'
model = HQQModelForCausalLM.from_quantized(model_id, cache_dir='.', compute_dtype=torch.float16, adapter='adapter_v0.1.lora')
tokenizer = AutoTokenizer.from_pretrained(model_id)

patch_linearlayers(model, patch_add_quant_config,
BaseQuantizeConfig(nbits=2, group_size=64, quant_scale=False, quant_zero=False, axis=1))

model.eval();
cleanup()

Use optimized inference kernels

###################################################
HQQLinear.set_backend(HQQBackend.PYTORCH)

prepare_for_inference(model) #default backend

prepare_for_inference(model, backend="bitblas", allow_merge=False) #It takes a while...

Generate

###################################################

For longer context, make sure to allocate enough cache via the cache_size= parameter

gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile=None) #Slower generation but no warm-up

gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Faster generation, but warm-up takes a while

gen.generate("Write an essay about large language models", print_tokens=True)
gen.generate("Tell me a funny joke!", print_tokens=True)
gen.generate("How to make a yummy chocolate cake?", print_tokens=True)

/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning:
The secret HF_TOKEN does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
warnings.warn(
Fetching 9 files: 100%
 9/9 [00:00<00:00, 311.93it/s]
/usr/local/lib/python3.11/dist-packages/hqq/models/base.py:237: FutureWarning: You are using torch.load with weights_only=False (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for weights_only will be flipped to True. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via torch.serialization.add_safe_globals. We recommend you start setting weights_only=True for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
return torch.load(cls.get_weight_file(save_dir), map_location=map_location)
0%| | 0/32 [00:00<?, ?it/s]

AttributeError Traceback (most recent call last)
in <cell line: 0>()
8 ###################################################
9 model_id = 'mobiuslabsgmbh/Llama-3-8b-instruct_2bitgs64_hqq'
---> 10 model = HQQModelForCausalLM.from_quantized(model_id, cache_dir='.', compute_dtype=torch.float16, adapter='adapter_v0.1.lora')
11 tokenizer = AutoTokenizer.from_pretrained(model_id)
12

4 frames
/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py in getattr(self, name)
1727 if name in modules:
1728 return modules[name]
-> 1729 raise AttributeError(f"'{type(self).name}' object has no attribute '{name}'")
1730
1731 def setattr(self, name: str, value: Union[Tensor, 'Module']) -> None:

AttributeError: 'LlamaAttention' object has no attribute 'rotary_emb'

Mobius Labs GmbH org

Same comment as before: make sure you use an updated version of transformers and a compatible GPU

Sign up or log in to comment