## **Step 1: Setting Up ExecuTorch**

* If using a Google colab notebook then please get a High-RAM instance to run this notebook.


In [None]:
! touch /content/executorch; rm -rf /content/executorch
! git clone https://github.com/pytorch/executorch ; cd /content/executorch; git submodule sync ; git submodule update --init

In [2]:
import sys
# This is a workaround for now
!mkdir -p /usr/lib/python{sys.version_info.major}.{sys.version_info.minor}/site-packages/torchgen/packaged/ATen/native/
!cp /usr/local/lib/python{sys.version_info.major}.{sys.version_info.minor}/dist-packages/torchgen/packaged/ATen/native/* /usr/lib/python{sys.version_info.major}.{sys.version_info.minor}/site-packages/torchgen/packaged/ATen/native/

In [None]:
import sysconfig; lib_path = sysconfig.get_paths()["purelib"]
! cd /content/executorch; CMAKE_PREFIX_PATH={lib_path} EXECUTORCH_BUILD_XNNPACK=ON bash ./install_executorch.sh

In [None]:
!cd /content/executorch; examples/models/llama/install_requirements.sh

## **Step 2. Download DeepSeek-R1-Distill-Llama-8B models**

In [None]:
!huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Llama-8B --local-dir /content/models/DeepSeek-R1-Distill-Llama-8B --local-dir-use-symlinks False

## **Step 3: Export to ExecuTorch**

In [None]:
!pip install torchtune

In [None]:
from torchtune.models import convert_weights
from torchtune.training import FullModelHFCheckpointer
import torch

# Convert from safetensors to TorchTune. Suppose the model has been downloaded from Hugging Face
checkpointer = FullModelHFCheckpointer(
 checkpoint_dir='/content/models/DeepSeek-R1-Distill-Llama-8B',
 checkpoint_files=['model-00001-of-000002.safetensors', 'model-00002-of-000002.safetensors'],
 output_dir='/tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/' ,
 model_type='LLAMA3' # or other types that TorchTune supports
)

print("loading checkpoint")
sd = checkpointer.load_checkpoint()

# Convert from TorchTune to Meta (PyTorch native)
sd = convert_weights.tune_to_meta(sd['model'])

print("saving checkpoint")
torch.save(sd, "/tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/checkpoint.pth")

In [None]:
# Download https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct/blob/main/original/params.json and place it in /tmp/params.json

In [None]:
!cd /content/executorch; python -m examples.models.llama.export_llama \
 --checkpoint /tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/checkpoint.pth \
	-p /tmp/params.json \
	-kv \
	--use_sdpa_with_kv_cache \
	-X \
	-qmode 8da4w \
	--group_size 128 \
	-d fp16 \
	--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
	--embedding-quantize 4,32 \
	--output_name="DeepSeek-R1-Distill-Llama-8B.pte"