{ "cells": [ { "cell_type": "markdown", "id": "5093355f-b45c-483e-a8da-8753eb1536c2", "metadata": { "id": "5093355f-b45c-483e-a8da-8753eb1536c2" }, "source": [ "\n", " \"Open\n", "" ] }, { "cell_type": "markdown", "id": "xQvn0r7NYIgW", "metadata": { "id": "xQvn0r7NYIgW" }, "source": [ "## 0. Preparation:\n", "### 0-1. Add packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "cLDyOsEcYYsy", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "cLDyOsEcYYsy", "outputId": "ecc210a9-5491-4bd9-d843-bf74c4381a01" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Found existing installation: torch 2.1.0+cu121\n", "Uninstalling torch-2.1.0+cu121:\n", " Successfully uninstalled torch-2.1.0+cu121\n", "Found existing installation: torchvision 0.16.0+cu121\n", "Uninstalling torchvision-0.16.0+cu121:\n", " Successfully uninstalled torchvision-0.16.0+cu121\n", "Found existing installation: torchaudio 2.1.0+cu121\n", "Uninstalling torchaudio-2.1.0+cu121:\n", " Successfully uninstalled torchaudio-2.1.0+cu121\n", "Found existing installation: torchdata 0.7.0\n", "Uninstalling torchdata-0.7.0:\n", " Successfully uninstalled torchdata-0.7.0\n", "Found existing installation: torchtext 0.16.0\n", "Uninstalling torchtext-0.16.0:\n", " Successfully uninstalled torchtext-0.16.0\n", "Found existing installation: fastai 2.7.13\n", "Uninstalling fastai-2.7.13:\n", " Successfully uninstalled fastai-2.7.13\n", "\n", "Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu113\n", "Collecting torch==1.12.0+cu113\n", " Downloading https://download.pytorch.org/whl/cu113/torch-1.12.0%2Bcu113-cp310-cp310-linux_x86_64.whl (1837.6 MB)\n", " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.8/1.8 GB 975.3 kB/s eta 0:00:00\n", "Collecting torchvision==0.13.0+cu113\n", " Downloading https://download.pytorch.org/whl/cu113/torchvision-0.13.0%2Bcu113-cp310-cp310-linux_x86_64.whl (23.4 MB)\n", " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.4/23.4 MB 43.2 MB/s eta 0:00:00\n", "Collecting torchaudio==0.12.0\n", " Downloading https://download.pytorch.org/whl/cu113/torchaudio-0.12.0%2Bcu113-cp310-cp310-linux_x86_64.whl (3.8 MB)\n", " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.8/3.8 MB 33.1 MB/s eta 0:00:00\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch==1.12.0+cu113) (4.5.0)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision==0.13.0+cu113) (1.23.5)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision==0.13.0+cu113) (2.31.0)\n", "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision==0.13.0+cu113) (9.4.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.13.0+cu113) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.13.0+cu113) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.13.0+cu113) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.13.0+cu113) (2023.11.17)\n", "Installing collected packages: torch, torchvision, torchaudio\n", "Successfully installed torch-1.12.0+cu113 torchaudio-0.12.0+cu113 torchvision-0.13.0+cu113\n", "\n", "Torch version: 1.12.0+cu113\n", "\u001b[1;32m For the 1st run, \n", "a. Install omegafold...\n", "Collecting git+https://github.com/HeliXonProtein/OmegaFold.git\n", " Cloning https://github.com/HeliXonProtein/OmegaFold.git to /tmp/pip-req-build-xd1_66dd\n", " Running command git clone --filter=blob:none --quiet https://github.com/HeliXonProtein/OmegaFold.git /tmp/pip-req-build-xd1_66dd\n", " Resolved https://github.com/HeliXonProtein/OmegaFold.git to commit 313c873ad190b64506a497c926649e15fcd88fcd\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting torch@ https://download.pytorch.org/whl/cu113/torch-1.12.0%2Bcu113-cp310-cp310-linux_x86_64.whl (from OmegaFold==0.0.0)\n", " Using cached https://download.pytorch.org/whl/cu113/torch-1.12.0%2Bcu113-cp310-cp310-linux_x86_64.whl (1837.6 MB)\n", "Collecting biopython (from OmegaFold==0.0.0)\n", " Downloading biopython-1.81-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from biopython->OmegaFold==0.0.0) (1.23.5)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch@ https://download.pytorch.org/whl/cu113/torch-1.12.0%2Bcu113-cp310-cp310-linux_x86_64.whl->OmegaFold==0.0.0) (4.5.0)\n", "Building wheels for collected packages: OmegaFold\n", " Building wheel for OmegaFold (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for OmegaFold: filename=OmegaFold-0.0.0-py3-none-any.whl size=55664 sha256=660238cd81e3b4bedb0e95b8d75becbc1854002b746ed0ef20425125135d223f\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-f1b09_yp/wheels/fe/57/01/3ce12996dd37debe3ee6a02e8748fffdd4df7b885c5bb8071d\n", "Successfully built OmegaFold\n", "Installing collected packages: biopython, OmegaFold\n", "Successfully installed OmegaFold-0.0.0 biopython-1.81\n", "--2023-12-17 13:39:51-- https://helixon.s3.amazonaws.com/release1.pt\n", "Resolving helixon.s3.amazonaws.com (helixon.s3.amazonaws.com)... 52.216.113.187, 52.217.86.252, 16.182.105.201, ...\n", "Connecting to helixon.s3.amazonaws.com (helixon.s3.amazonaws.com)|52.216.113.187|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 3181611124 (3.0G) [binary/octet-stream]\n", "Saving to: ‘/root/.cache/omegafold_ckpt/model.pt’\n", "\n", "/root/.cache/omegaf 100%[===================>] 2.96G 11.7MB/s in 3m 49s \n", "\n", "2023-12-17 13:43:41 (13.3 MB/s) - ‘/root/.cache/omegafold_ckpt/model.pt’ saved [3181611124/3181611124]\n", "\n", "b. Install DSSP...\n", "\n" ] } ], "source": [ "#@title ####For omegafold\n", "\n", "import os,sys\n", "import math\n", "\n", "#os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"-1\" #turn off CUDA if needed\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "\n", "# control the version of torch\n", "# import torch\n", "# if torch.__version__!='1.12.0+cu113':\n", "# comm_line=f'pip uninstall -y torch torchvision torchaudio torchdata torchtext fastai'\n", "# print(os.popen(comm_line).read())\n", "# comm_line=f'pip install torch==1.12.0+cu113 torchvision==0.13.0+cu113 torchaudio==0.12.0 --extra-index-url https://download.pytorch.org/whl/cu113'\n", "# print(os.popen(comm_line).read())\n", "\n", "comm_line=f'pip uninstall -y torch torchvision torchaudio torchdata torchtext fastai'\n", "print(os.popen(comm_line).read())\n", "comm_line=f'pip install torch==1.12.0+cu113 torchvision==0.13.0+cu113 torchaudio==0.12.0 --extra-index-url https://download.pytorch.org/whl/cu113'\n", "print(os.popen(comm_line).read())\n", "import torch\n", "print('Torch version: ', torch.__version__)\n", "\n", "path_1 = '/opt/bin/'\n", "dssp_file = path_1+'mkdssp'\n", "\n", "file_exists = os.path.exists(dssp_file)\n", "if not (file_exists):\n", " print('\\033[1;32m For the 1st run, ')\n", " # ==============================================\n", " print('a. Install omegafold...')\n", " # install omegafold\n", " # ref: https://github.com/HeliXonProtein/OmegaFold\n", " !pip install git+https://github.com/HeliXonProtein/OmegaFold.git\n", "\n", " # time-consuming step:\n", " # Downloading weights from https://helixon.s3.amazonaws.com/release1.pt to /root/.cache/omegafold_ckpt/model.pt\n", " !mkdir /root/.cache/omegafold_ckpt\n", " !wget https://helixon.s3.amazonaws.com/release1.pt -O /root/.cache/omegafold_ckpt/model.pt\n", "\n", " print('b. Install DSSP...')\n", " # download an mkdssp\n", " # ==============================================\n", " # download things\n", " print(os.popen(f\"wget https://www.dropbox.com/s/v4azy9z9yojg1c6/mkdssp -P {path_1}\").read())\n", " #\n", " !chmod u+x /opt/bin/mkdssp\n", "\n", "else:\n", " print('This is not the first run... ')" ] }, { "cell_type": "code", "execution_count": 2, "id": "pUhFAfNfZFsW", "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "pUhFAfNfZFsW", "outputId": "3f13597c-2418-4aea-a6af-a08615d70b64" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[1;32m a. on Biopython...\n", "\u001b[1;32m b. on kornia...\n", "Collecting kornia\n", " Downloading kornia-0.7.0-py2.py3-none-any.whl (705 kB)\n", " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 705.7/705.7 kB 10.2 MB/s eta 0:00:00\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from kornia) (23.2)\n", "Requirement already satisfied: torch>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from kornia) (1.12.0+cu113)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.9.1->kornia) (4.5.0)\n", "Installing collected packages: kornia\n", "Successfully installed kornia-0.7.0\n", "\n", "\u001b[1;32m c. on einops...\n", "Collecting einops\n", " Downloading einops-0.7.0-py3-none-any.whl (44 kB)\n", " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.6/44.6 kB 1.2 MB/s eta 0:00:00\n", "Installing collected packages: einops\n", "Successfully installed einops-0.7.0\n", "\n", "Collecting einops-exts\n", " Downloading einops_exts-0.0.4-py3-none-any.whl (3.9 kB)\n", "Requirement already satisfied: einops>=0.4 in /usr/local/lib/python3.10/dist-packages (from einops-exts) (0.7.0)\n", "Installing collected packages: einops-exts\n", "Successfully installed einops-exts-0.0.4\n", "\n", "Collecting pytorch-warmup\n", " Downloading pytorch_warmup-0.1.1-py3-none-any.whl (6.6 kB)\n", "Requirement already satisfied: torch>=1.1 in /usr/local/lib/python3.10/dist-packages (from pytorch-warmup) (1.12.0+cu113)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.1->pytorch-warmup) (4.5.0)\n", "Installing collected packages: pytorch-warmup\n", "Successfully installed pytorch-warmup-0.1.1\n", "\n", "Collecting ema-pytorch\n", " Downloading ema_pytorch-0.3.1-py3-none-any.whl (4.8 kB)\n", "Collecting beartype (from ema-pytorch)\n", " Downloading beartype-0.16.4-py3-none-any.whl (819 kB)\n", " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 819.1/819.1 kB 13.3 MB/s eta 0:00:00\n", "Requirement already satisfied: torch>=1.6 in /usr/local/lib/python3.10/dist-packages (from ema-pytorch) (1.12.0+cu113)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.6->ema-pytorch) (4.5.0)\n", "Installing collected packages: beartype, ema-pytorch\n", "Successfully installed beartype-0.16.4 ema-pytorch-0.3.1\n", "\n", "Collecting accelerate\n", " Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)\n", " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 265.7/265.7 kB 4.4 MB/s eta 0:00:00\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.2)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1)\n", "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.12.0+cu113)\n", "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.19.4)\n", "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.4.1)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (4.5.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (3.13.1)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (2023.6.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (2.31.0)\n", "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (4.66.1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2023.11.17)\n", "Installing collected packages: accelerate\n", "Successfully installed accelerate-0.25.0\n", "\n", "Collecting py3Dmol\n", " Downloading py3Dmol-2.0.4-py2.py3-none-any.whl (12 kB)\n", "Installing collected packages: py3Dmol\n", "Successfully installed py3Dmol-2.0.4\n", "\n", "Collecting fair-esm\n", " Downloading fair_esm-2.0.0-py3-none-any.whl (93 kB)\n", " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 93.1/93.1 kB 2.2 MB/s eta 0:00:00\n", "Installing collected packages: fair-esm\n", "Successfully installed fair-esm-2.0.0\n", "\n", "Collecting torchinfo\n", " Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)\n", "Installing collected packages: torchinfo\n", "Successfully installed torchinfo-1.8.0\n", "\n" ] } ], "source": [ "#@title ####For other pip packages\n", "# add some packages\n", "try:\n", " print('\\033[1;32m a. on Biopython...')\n", " from Bio.PDB import PDBParser\n", "except ImportError as e:\n", " print(os.popen('pip install biopython').read())\n", "\n", "try:\n", " print('\\033[1;32m b. on kornia...')\n", " import kornia.augmentation\n", "except ImportError as e:\n", " print(os.popen('pip install kornia').read())\n", "\n", "try:\n", " print('\\033[1;32m c. on einops...')\n", " from einops import rearrange, repeat, reduce\n", "except ImportError as e:\n", " print(os.popen('pip install einops').read())\n", "\n", "try:\n", " from einops_exts import rearrange_many, repeat_many, check_shape\n", "except ImportError as e:\n", " print(os.popen('pip install einops-exts').read())\n", "\n", "try:\n", " import pytorch_warmup as warmup\n", "except ImportError as e:\n", " print(os.popen('pip install pytorch-warmup').read())\n", "\n", "try:\n", " from ema_pytorch import EMA\n", "except ImportError as e:\n", " print(os.popen('pip install ema-pytorch').read())\n", "\n", "try:\n", " from accelerate import Accelerator, DistributedType, DistributedDataParallelKwargs\n", "except ImportError as e:\n", " print(os.popen('pip install accelerate').read())\n", "\n", "try:\n", " import py3Dmol\n", "except ImportError as e:\n", " print(os.popen('pip install py3Dmol').read())\n", "\n", "# added\n", "try:\n", " import esm\n", "except ImportError as e:\n", " print(os.popen('pip install fair-esm').read())\n", "\n", "try:\n", " import torchinfo\n", "except ImportError as e:\n", " print(os.popen('pip install torchinfo').read())" ] }, { "cell_type": "markdown", "id": "R3JeHPw9bOMQ", "metadata": { "id": "R3JeHPw9bOMQ" }, "source": [ "### 0.2. copy src from github" ] }, { "cell_type": "code", "execution_count": 3, "id": "wV1H9Bf0bYat", "metadata": { "cellView": "form", "id": "wV1H9Bf0bYat" }, "outputs": [], "source": [ "#@title #### Model code\n", "\n", "import json, time, os, sys, glob\n", "\n", "# # just script, no install is used\n", "# if not os.path.isdir(\"ProteinMPNN\"):\n", "# os.system(\"git clone -q https://github.com/dauparas/ProteinMPNN.git\")\n", "# sys.path.append('/content/ProteinMPNN/')\n", "\n", "# ===================================================================\n", "\n", "if not os.path.isdir(\"ProteinMechanicsDiffusionDesign_pLDM\"):\n", " os.system(\"git clone -q https://github.com/Bo-Ni/ProteinMechanicsDiffusionDesign_pLDM.git\")\n", "# sys.path.append('/content/ProteinMechanicsDiffusionDesign_pLDM/ProteinMechanicsDiffusionDesign/')\n", "sys.path.append('/content/ProteinMechanicsDiffusionDesign_pLDM/')" ] }, { "cell_type": "code", "execution_count": 4, "id": "TBNNv-Oo7Ftw", "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "TBNNv-Oo7Ftw", "outputId": "b7e492f4-d50f-4e34-a690-0065e169602b" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "identify the device independently cuda:0\n" ] } ], "source": [ "#@title #### Small test\n", "# a slient test\n", "# import ProteinMechanicsDiffusionDesign.UtilityPack as UtilityPack\n", "# import ProteinMechanicsDiffusionDesign.DataSetPack as DataSetPack\n", "# import ProteinMechanicsDiffusionDesign.ModelPack as ModelPack\n", "# import ProteinMechanicsDiffusionDesign.TrainerPack as TrainerPack\n", "# import ProteinMechanicsDiffusionDesign.PostMDPack as PostMDPack\n", "#\n", "import PD_pLMProbXDiff.UtilityPack as UtilityPack\n", "import PD_pLMProbXDiff.DataSetPack as DataSetPack\n", "import PD_pLMProbXDiff.ModelPack as ModelPack\n", "import PD_pLMProbXDiff.TrainerPack as TrainerPack\n", "import PD_pLMProbXDiff.PostMDPack as PostMDPack" ] }, { "cell_type": "markdown", "id": "BRkJe9AUNRfy", "metadata": { "id": "BRkJe9AUNRfy" }, "source": [ "### 0.3. Download the model files" ] }, { "cell_type": "code", "execution_count": 5, "id": "h5l3MrGnNQuJ", "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "h5l3MrGnNQuJ", "outputId": "dc5f1151-7b06-4e13-8299-9a3c51e7cf06" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Creating working path...\n", "\n", "Done.\n", "Downing files...\n", "\n", "\n", "Creating data path...\n", "\n", "Done.\n", "Downing files...\n", "\n", "Creating model path...\n", "\n", "Done.\n", "Downing files...\n", "\n" ] } ], "source": [ "#@title #### Set up the saved model\n", "\n", "# just script, no install is used\n", "this_working_path = '/content/working_results/'\n", "\n", "if not os.path.isdir(this_working_path):\n", " print('Creating working path...')\n", " print(os.popen('mkdir '+this_working_path).read())\n", " print('Done.')\n", " print('Downing files...')\n", "\n", "this_file = this_working_path+'model_pack.pickle'\n", "file_exists = os.path.exists(this_file)\n", "if not (file_exists):\n", " # download things\n", " this_link='https://www.dropbox.com/scl/fi/i2sull7ftjwrrzeaxo8v1/model_pack.pickle?rlkey=7wy5zynrl6m8azufklq3fy8ql&dl=0'\n", " cmd_line = f\"wget -O {this_file} {this_link}\"\n", " print(os.popen(cmd_line).read())\n", "\n", "#\n", "this_file = this_working_path+'data_pack.pickle'\n", "file_exists = os.path.exists(this_file)\n", "if not (file_exists):\n", " # download things\n", " this_link='https://www.dropbox.com/scl/fi/z7sz0q2nsjn85kyh68p86/data_pack.pickle?rlkey=bwm9fgf29ze8o516r155zg4gl&dl=0'\n", " cmd_line = f\"wget -O {this_file} {this_link}\"\n", " print(os.popen(cmd_line).read())\n", "\n", "# add for dataset part\n", "this_temp_path = this_working_path+'0_dataprocess_MD/'\n", "if not os.path.isdir(this_temp_path):\n", " print('Creating data path...')\n", " print(os.popen('mkdir '+this_temp_path).read())\n", " print('Done.')\n", " print('Downing files...')\n", "# add file if needed\n", "#\n", "this_file = this_working_path+'0_dataprocess_MD/'+'ForTest_LE_128_From_F1_f5.pt'\n", "file_exists = os.path.exists(this_file)\n", "if not (file_exists):\n", " # download things\n", " this_link='https://www.dropbox.com/scl/fi/zn2qdcdd4apsh88xirfzj/ForTest_LE_128_From_F1_f5.pk?rlkey=oamepqanqbpzrt8lrgjknp9bk&dl=0'\n", " cmd_line = f\"wget -O {this_file} {this_link}\"\n", " print(os.popen(cmd_line).read())\n", "\n", "# add for model part\n", "this_temp_path = this_working_path+'1_model_SS/'\n", "if not os.path.isdir(this_temp_path):\n", " print('Creating model path...')\n", " print(os.popen('mkdir '+this_temp_path).read())\n", " print('Done.')\n", " print('Downing files...')\n", "# add file if needed\n", "#\n", "this_file = this_working_path+'1_model_SS/'+'trainer_save-model_pLDM.pt'\n", "file_exists = os.path.exists(this_file)\n", "if not (file_exists):\n", " # download things\n", " this_link='https://www.dropbox.com/s/09wyap14yrnoeom/trainer_save-model_pLDM.pt?dl=0'\n", " cmd_line = f\"wget -O {this_file} {this_link}\"\n", " print(os.popen(cmd_line).read())" ] }, { "cell_type": "markdown", "id": "jc-QOhsnGAm9", "metadata": { "id": "jc-QOhsnGAm9" }, "source": [ "## 1. Working part" ] }, { "cell_type": "code", "execution_count": 6, "id": "o6kXrcuX_LtK", "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "o6kXrcuX_LtK", "outputId": "d49af4da-8b56-42a1-dfaa-42de9382fa0f" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Here is : \n", " /content\n", "\n", "What we get in hardware: \n", " Sun Dec 17 13:50:07 2023 \n", "+---------------------------------------------------------------------------------------+\n", "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", "|-----------------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|=========================================+======================+======================|\n", "| 0 NVIDIA A100-SXM4-40GB Off | 00000000:00:04.0 Off | 0 |\n", "| N/A 31C P0 42W / 400W | 5MiB / 40960MiB | 0% Default |\n", "| | | Disabled |\n", "+-----------------------------------------+----------------------+----------------------+\n", " \n", "+---------------------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=======================================================================================|\n", "| No running processes found |\n", "+---------------------------------------------------------------------------------------+\n", "\n", "What we have in software: \n", " Torch version: 1.12.0+cu113\n", "Python: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]\n", "\n", "What we have in software: \n", " Torch version: 1.12.0+cu113\n", "Python: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]\n", "\n", "What hardware the software see:\n", "cuda:0\n", "# of GPU 1\n", "\n" ] } ], "source": [ "#@title ### 1.0. Check the floor\n", "import os, sys\n", "print('Here is : \\n', os.popen('pwd').read())\n", "print('What we get in hardware: \\n', os.popen('nvidia-smi').read())\n", "\n", "import torch\n", "print(\"What we have in software: \\n Torch version:\", torch.__version__)\n", "print('Python: ', sys.version) # no switch case code\n", "print()\n", "\n", "import torch\n", "print(\"What we have in software: \\n Torch version:\", torch.__version__)\n", "print('Python: ', sys.version) # no switch case code\n", "print()\n", "\n", "print('What hardware the software see:')\n", "device = torch.device(\n", " \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n", ")\n", "print(device)\n", "num_of_gpus = torch.cuda.device_count()\n", "print(\"# of GPU\", num_of_gpus)\n", "print()\n", "torch.cuda.empty_cache()" ] }, { "cell_type": "code", "execution_count": 7, "id": "SuRSZhjbGgi1", "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "SuRSZhjbGgi1", "outputId": "46666394-2700-4fd2-8c46-acedde6df66f" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"Running_Type\": 2,\n", " \"Working_Mode\": 2,\n", " \"IF_FirstRun\": 2,\n", " \"Problem_ID\": 11,\n", " \"Debug\": 0,\n", " \"Debug_DataSet\": 1,\n", " \"Debug_Model\": 1,\n", " \"SlientRun\": 1,\n", " \"Debug_DataPack\": 0,\n", " \"Debug_ModelPack\": 0,\n", " \"Debug_TrainerPack\": 0,\n", " \"epochs\": 750,\n", " \"print_loss_every_this_epochs\": 50,\n", " \"sample_every_this_epochs\": 100,\n", " \"save_model_every_this_epochs\": 50,\n", " \"testratio\": 0.1\n", "}\n", "Problem type: 11\n", "Debug mode: 0\n", "Working mode: 2\n" ] } ], "source": [ "#@title ### 1.1. Setup the problem\n", "# prepare for package debugging\n", "# for debug\n", "import importlib\n", "import json\n", "import matplotlib.pyplot as plt\n", "# import PD_pLMProbXDiff.UtilityPack as UtilityPack\n", "# # run this when updating the package\n", "# importlib.reload(UtilityPack)\n", "# ===============================================\n", "# Global control key setup\n", "# ===============================================\n", "# Control keys:\n", "# This one will be directly modified for each task\n", "CKeys = dict(\n", " #\n", " Running_Type=2, # 1-local:engaging cluster, 2-supercould cluster, 3-google colab, 4-local ubuntu\n", " #\n", " # Working_Mode=1, # 1-training, 2-sampling for test\n", " Working_Mode=2,\n", " #\n", " # IF_FirstRun=1, # 1-1st run of training; otherwise, # of training run\n", " IF_FirstRun=2, # 1-1st run of training; otherwise, # of training run\n", " #\n", " # 1-SecStr-ModelB, 2-MD-ModelB, 3-SecStr-ModelA, 4-MD-ModelA\n", " # 5-SecStr-ModelB-Embdding, 6-MD-ModelB, 7-SecStr-ModelA-pLM, 8-MD-ModelA\n", " # 9-MD-Predictor-ModelB, 10-\n", " # 11-MD-ModelB\n", " Problem_ID=11, # 8, # 6,\n", " #\n", " # Debug=1, # 1-debug mode on; add more debug keys for different blocks\n", " Debug=0, # 1-debug mode on; add more debug keys for different blocks\n", " #\n", " Debug_DataSet=1,\n", " Debug_Model=1\n", " #\n", " # Debug=0\n", ")\n", "\n", "if CKeys['Debug']==1:\n", " # add some\n", " CKeys['SlientRun']=0 # 1-save figure into files; 0-show figures\n", " # detailed debug keys\n", " # 1. for model dimension\n", " CKeys['Debug_DataPack']=1\n", " CKeys['Debug_ModelPack']=1\n", " CKeys['Debug_TrainerPack']=1\n", " # 2. for trainer part\n", " CKeys['Debug_DataPack']=1\n", " CKeys['Debug_ModelPack']=0\n", " CKeys['Debug_TrainerPack']=3\n", " # #\n", " # CKeys['testratio']=0.15 # for small ForcPath problem\n", " CKeys['testratio']=0.10 # for large ForcPath problem\n", "\n", "else:\n", " # for real working run\n", " CKeys['SlientRun']=1\n", " #\n", " CKeys['Debug_DataPack'] = 0\n", " CKeys['Debug_ModelPack'] = 0\n", " CKeys['Debug_TrainerPack'] = 0 # 2 # 1\n", " # add some for training\n", " CKeys['epochs'] = 4000-3250 # 1000 # 200\n", " CKeys['print_loss_every_this_epochs']=50 # 5\n", " CKeys['sample_every_this_epochs']=100 # 50 # 20\n", " CKeys['save_model_every_this_epochs']=50 # 20\n", " # #\n", " # # add some for training\n", " # CKeys['epochs'] = 2000 # 1000 # 200\n", " # CKeys['print_loss_every_this_epochs']=20 # 5\n", " # CKeys['sample_every_this_epochs']=50 # 50 # 20\n", " # CKeys['save_model_every_this_epochs']=50 # 20\n", " #\n", " # CKeys['testratio']=0.15 # for small ForcPath problem\n", " CKeys['testratio']=0.10 # for large ForcPath problem\n", " #\n", " # # add some for training\n", " # CKeys['epochs'] = 4 # 1000 # 200\n", " # CKeys['print_loss_every_this_epochs']=1 # 5\n", " # CKeys['sample_every_this_epochs']=1 # 50 # 20\n", " # CKeys['save_model_every_this_epochs']=2 # 20\n", "\n", "# for check\n", "print(json.dumps(CKeys, indent=4))\n", "\n", "\n", "# Problem type:\n", "print('Problem type: ', CKeys['Problem_ID'])\n", "print('Debug mode: ', CKeys['Debug'])\n", "print('Working mode: ', CKeys['Working_Mode'])\n", "\n", "\n", "# ===========================================================\n", "# Parameter key:\n", "# ===========================================================\n", "#\n", "PKeys = {}\n", "# define this one according the running environment\n", "# add more if needed\n", "# if CKeys['Running_Type']==1 or CKeys['Running_Type']==4:\n", "# if CKeys['Running_Type']==1:\n", "#\n", "#\n", "root_path = '/home/gridsan/bni/Test_ground/jupyter/1_git_project/sort_pdb_database_0/Local_Store/'\n", "if CKeys['Debug']==1:\n", " # PKeys['prefix']='../Local_Store/For_16_0/'\n", " # use the absolute path for transformability\n", " PKeys['prefix']=root_path+'For_20_0/'\n", "\n", "if CKeys['Debug']!=1:\n", " # PKeys['prefix']='../Local_Store/For_16_1/'\n", " PKeys['prefix']=root_path+'For_20_1/'\n", " PKeys['prefix']='/home/gridsan/bni/16_WG_git_sort_pdb_database_0/11_pLMProb_Diff_SMD_ModelB_embed_640/0_Training/'\n", " # add one for google colab\n", " PKeys['prefix']=this_working_path # '/content/11_pLMProb_Diff_SMD_ModelB_embed_640/0_Training/'\n", "\n", "# store the data pack after processing\n", "PKeys['pk_data_pack']=PKeys['prefix']+'data_pack.pickle'\n", "PKeys['pk_model_pack']=PKeys['prefix']+'model_pack.pickle'\n", "# PKeys[]\n", "# print(CKeys['Running_Type'])\n", "# print(CKeys['Working_Mode']==1 and CKeys['IF_FirstRun']==1)\n", "# print(PKeys['prefix'])\n", "\n", "#\n", "# clean EVERYTHING in the dir if 1st\n", "#\n", "if CKeys['Working_Mode']==1 and CKeys['IF_FirstRun']==1:\n", " if os.path.exists(PKeys['prefix']):\n", " cmd_line=f\"rm -r {PKeys['prefix']}\"\n", " print(\"clean the slade...\")\n", " print(f\"excute {cmd_line}\")\n", " os.popen(cmd_line).read()\n", " #\n", " # create dir for working space\n", " UtilityPack.create_path(PKeys['prefix'])\n", "# ========================================================================\n", "# prepare the csv files\n", "# ========================================================================\n", "\n", "if CKeys['Problem_ID']==1 or CKeys['Problem_ID']==3 \\\n", "or CKeys['Problem_ID']==5 or CKeys['Problem_ID']==7:\n", "# SS_csv_file = PKeys['prefix']+'PROTEIN_Mar18_2022_SECSTR_ALL.csv'\n", "\n", "# file_exists = os.path.exists(SS_csv_file)\n", "# if not (file_exists):\n", "# print('Downing the csv file...')\n", "# print(os.popen(f\"wget https://www.dropbox.com/s/7o7s15w9qr6z76y/PROTEIN_Mar18_2022_SECSTR_ALL.csv -P {PKeys['prefix']}\").read())\n", "# print('Done.')\n", "# else:\n", "# print(\"Already there\")\n", " # +\n", " # SS_csv_file = '/home/gridsan/bni/Test_ground/jupyter/1_git_project/sort_pdb_database_0/Local_Store/'+'PROTEIN_Mar18_2022_SECSTR_ALL.csv'\n", " SS_csv_file = root_path+'Local_Store/'+'PROTEIN_Mar18_2022_SECSTR_ALL.csv'\n", "\n", "if CKeys['Problem_ID']==2 or CKeys['Problem_ID']==4 \\\n", "or CKeys['Problem_ID']==6 or CKeys['Problem_ID']==8 \\\n", "or CKeys['Problem_ID']==11:\n", " # to be copied locally\n", " # MD_smo_csv_file = '/home/gridsan/bni/Test_ground/jupyter/1_git_project/sort_pdb_database_0/Local_Store/For_1/ForTrain_recon_BSDB_LE_64_smd_disp_forc_df_smo.csv'\n", " # first debug with LE_64, to be update into LE_128\n", " MD_smo_csv_file = root_path+'For_1/ForTrain_recon_BSDB_LE_64_smd_disp_forc_df_smo.csv'\n", " MD_smo_pk_file = root_path+'For_1/ForTrain_recon_BSDB_LE_64_smd_disp_forc_df_smo_shared.pk'\n", " # into LE_128\n", " MD_smo_pk_file = root_path+'For_1/ForTrain_recon_BSDB_LE_128_smd_disp_forc_df_smo.pk'" ] }, { "cell_type": "code", "execution_count": 8, "id": "yI_BcJcx9nVJ", "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "yI_BcJcx9nVJ", "outputId": "02e30df8-a912-4e2c-d208-934d0a4bf4bc" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "On Problem: 11\n", "/content/working_results/\n", "{'Running_Type': 2, 'Working_Mode': 2, 'IF_FirstRun': 2, 'Problem_ID': 11, 'Debug': 0, 'Debug_DataSet': 1, 'Debug_Model': 1, 'SlientRun': 1, 'Debug_DataPack': 0, 'Debug_ModelPack': 0, 'Debug_TrainerPack': 0, 'epochs': 750, 'print_loss_every_this_epochs': 50, 'sample_every_this_epochs': 100, 'save_model_every_this_epochs': 50, 'testratio': 0.1}\n", "This is not the first run\n", "Load back in the data packages...\n", "Done.\n", "{'data_dir': '/content/working_results/0_dataprocess_MD/', 'min_AA_seq_len': 0, 'max_AA_seq_len': 128, 'max_Force_cap': 1000, 'X_Key': 'sample_FORCEpN_data', 'tokenizer_X': None, 'tokenizer_y': None, 'Xnormfac': 750.0, 'ynormfac': 1.0, 'batch_size': 256, 'testset_ratio': 0.15, 'maxdata': 99999999991000, 'ESM-2_Model': 'esm2_t30_150M_UR50D', 'image_channels': 33}\n", "/content/working_results/\n" ] } ], "source": [ "#@title ### 1.2. Handle the dataset\n", "\n", "import pickle\n", "import numpy as np\n", "import pandas as pd\n", "# import PD_pLMProbXDiff.DataSetPack as DataSetPack\n", "# importlib.reload(DataSetPack)\n", "print('On Problem: ', CKeys['Problem_ID'])\n", "print(this_working_path)\n", "# on the sec_str csv file: SecStr\n", "# try to convey all para via one key\n", "# ====================================================\n", "# add some new keys for dataset\n", "# ====================================================\n", "# for data washing: only for 1st training cycle\n", "if CKeys['Working_Mode']==1 and CKeys['IF_FirstRun']==1:\n", "\n", " if CKeys['Problem_ID']==1:\n", " pass\n", " # print(\"1\")\n", " # # +++++++++++++++++++++++++++++++++++++\n", " # # SecStr as input seq\n", " # DataKeys={}\n", " # DataKeys['data_dir']=PKeys['prefix']+'0_dataprocess_SS/'\n", " # # screening rules\n", " # DataKeys['min_AA_seq_len']=0\n", " # DataKeys['max_AA_seq_len']=128\n", " # # X and Y processing\n", " # DataKeys['Xnormfac']=9.\n", " # DataKeys['ynormfac']=21.\n", " # DataKeys['tokenizer_X']=None\n", " # DataKeys['tokenizer_y']=None\n", " # # + for AA embending using ESM\n", " # DataKeys['ESM-2_Model']='esm2_t33_650M_UR50D'\n", " # # deliver\n", " # DataKeys['batch_size']=256\n", " # DataKeys['batch_size']=200\n", " # DataKeys['testset_ratio']=0.1\n", " # DataKeys['maxdata']=99999999999999999\n", " # # add the folder for Data part\n", " # UtilityPack.create_path(DataKeys['data_dir'])\n", "\n", " elif CKeys['Problem_ID']==2:\n", " pass\n", " print(\"2\")\n", "# # ++++++++++++++++++++++++++++++++++++++\n", "# # MD record as the input seq\n", "# #\n", "# # try to convey all para via one key\n", "# DataKeys={}\n", "# # ======================================\n", "# # keys for \"screen_dataset_MD\"\n", "# DataKeys['data_dir']=PKeys['prefix']+'0_dataprocess_MD/'\n", "# # add the folder\n", "# UtilityPack.create_path(DataKeys['data_dir'])\n", "\n", "# # screening rules\n", "# DataKeys['min_AA_seq_len']=0\n", "# DataKeys['max_AA_seq_len']=64\n", "# DataKeys['max_Force_cap']=1000\n", "# # special ones\n", "# # change text arr into np arr\n", "# DataKeys['arr_key']=[\n", "# 'posi_data','pull_data','forc_data',\n", "# 'gap_data','normalized_gap_data',\n", "# 'pull_gap_data', 'normalized_pull_gap_data',\n", "# 'sample_NormPullGap_data','sample_FORCEpN_data']\n", "\n", "# df_raw, protein_df = DataSetPack.screen_dataset_MD(\n", "# file_path=MD_smo_csv_file,\n", "# PKeys=DataKeys, # to be updated\n", "# CKeys=CKeys,\n", "# )\n", "\n", "# # save the dataframe\n", "# pd.to_pickle(protein_df, DataKeys['data_dir']+'protein_df.pk')\n", "# pd.to_pickle(df_raw, DataKeys['data_dir']+'df_raw.pk')\n", "\n", "# # ======================================\n", "# # keys for 2nd function\n", "# DataKeys['X_Key']='sample_FORCEpN_data' # or 'Max_Smo_Force'\n", "# #\n", "# DataKeys['tokenizer_X']=None # will not be used\n", "# DataKeys['tokenizer_y']=None # to be created\n", "# DataKeys['Xnormfac'] = np.max(protein_df['Max_Smo_Force'])\n", "# print('Normalization factor for force: ', DataKeys['Xnormfac'])\n", "# DataKeys['ynormfac']=21. # old force diffusion model 22.\n", "# #\n", "# DataKeys['batch_size']=256\n", "# DataKeys['testset_ratio']=0.15\n", "# DataKeys['maxdata']=99999999991000\n", "\n", " elif CKeys['Problem_ID']==3:\n", " pass\n", " # print(\"3\")\n", " # # +++++++++++++++++++++++++++++++++++++\n", " # # SecStr as input seq\n", " # DataKeys={}\n", " # DataKeys['data_dir']=PKeys['prefix']+'0_dataprocess_SS_ModelA/'\n", " # # screening rules\n", " # DataKeys['min_AA_seq_len']=0\n", " # DataKeys['max_AA_seq_len']=64 # 128\n", " # DataKeys['max_text_len']=8\n", " # # X and Y processing\n", " # DataKeys['Xnormfac']=1.\n", " # DataKeys['ynormfac']=22. # 21.\n", " # DataKeys['tokenizer_X']=None\n", " # DataKeys['tokenizer_y']=None\n", " # # deliver\n", " # DataKeys['batch_size']=512\n", " # # for debug purpose\n", " # # DataKeys['batch_size']=1\n", " # DataKeys['testset_ratio']= 0.1\n", " # DataKeys['maxdata']=99999999999999999\n", " # # add the folder for Data part\n", " # UtilityPack.create_path(DataKeys['data_dir'])\n", "\n", " elif CKeys['Problem_ID']==4:\n", " pass\n", "\n", "# print(\"4: input text condition, output sequence...\")\n", "# # ++++++++++++++++++++++++++++++++++++++\n", "# # MD record as the input seq\n", "# #\n", "# # try to convey all para via one key\n", "# DataKeys={}\n", "# # ======================================\n", "# # keys for \"screen_dataset_MD\"\n", "# DataKeys['data_dir']=PKeys['prefix']+'0_dataprocess_MD/'\n", "# # add the folder\n", "# UtilityPack.create_path(DataKeys['data_dir'])\n", "\n", "# # screening rules\n", "# DataKeys['min_AA_seq_len']=0\n", "# DataKeys['max_AA_seq_len']=64\n", "# DataKeys['max_text_len']=2\n", "# DataKeys['max_Force_cap']=1000\n", "# # special ones\n", "# # change text arr into np arr\n", "# DataKeys['arr_key']=[\n", "# 'posi_data','pull_data','forc_data',\n", "# 'gap_data','normalized_gap_data',\n", "# 'pull_gap_data', 'normalized_pull_gap_data',\n", "# 'sample_NormPullGap_data','sample_FORCEpN_data']\n", "\n", "# df_raw, protein_df = DataSetPack.screen_dataset_MD(\n", "# file_path=MD_smo_csv_file,\n", "# PKeys=DataKeys, # to be updated\n", "# CKeys=CKeys,\n", "# )\n", "\n", "# # save the dataframe\n", "# pd.to_pickle(protein_df, DataKeys['data_dir']+'protein_df.pk')\n", "# pd.to_pickle(df_raw, DataKeys['data_dir']+'df_raw.pk')\n", "\n", "# # ======================================\n", "# # keys for 2nd function\n", "# DataKeys['X_Key']=['Max_Smo_Force','Int_Smo_ForcPull'] # 'sample_FORCEpN_data' # or 'Max_Smo_Force'\n", "# #\n", "# DataKeys['tokenizer_X']=None # will not be used\n", "# DataKeys['tokenizer_y']=None # to be created\n", "# #\n", "# print('Normalization factor for force: ',\n", "# np.max(protein_df['Max_Smo_Force']))\n", "# print('Normalization factor for toughness: ',\n", "# np.max(protein_df['Int_Smo_ForcPull']))\n", "# #\n", "# DataKeys['Xnormfac'] = np.array([\n", "# np.max(protein_df['Max_Smo_Force']),\n", "# np.max(protein_df['Int_Smo_ForcPull'])\n", "# ])\n", "# #\n", "# DataKeys['ynormfac']=21. # old force diffusion model 22.\n", "# #\n", "# DataKeys['batch_size']=256\n", "# DataKeys['testset_ratio']=0.15\n", "# DataKeys['maxdata']=99999999991000\n", "\n", " # /////////////////////////////////////////////////////////////\n", " # try embedding\n", " # \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\n", " elif CKeys['Problem_ID']==5:\n", " pass\n", "\n", "# print(\"5\")\n", "# # +++++++++++++++++++++++++++++++++++++\n", "# # SecStr as input seq\n", "# DataKeys={}\n", "# DataKeys['data_dir']=PKeys['prefix']+'0_dataprocess_SS/'\n", "# # screening rules\n", "# DataKeys['min_AA_seq_len']=0\n", "# DataKeys['max_AA_seq_len']=128\n", "# # X and Y processing\n", "# DataKeys['Xnormfac']=9.\n", "# DataKeys['ynormfac']=1 # for ESM # 21.\n", "# DataKeys['tokenizer_X']=None\n", "# DataKeys['tokenizer_y']=None\n", "# # + for AA embending using ESM\n", "# DataKeys['ESM-2_Model']='esm2_t33_650M_UR50D'\n", "# # add for embedding space\n", "# DataKeys['image_channels']=1280\n", "# # deliver\n", "# DataKeys['batch_size']=256\n", "# DataKeys['batch_size']=256 # 0 # 200\n", "# DataKeys['testset_ratio']=CKeys['testratio'] # 0.1\n", "# DataKeys['maxdata']=99999999999999999\n", "\n", "# # add the folder for Data part\n", "# UtilityPack.create_path(DataKeys['data_dir'])\n", " #\n", " elif CKeys['Problem_ID']==6:\n", " print(\"6, input MD ForcePath, output AA sequence\")\n", " # ++++++++++++++++++++++++++++++++++++++\n", " # MD record as the tokenized input seq\n", " #\n", " # try to convey all para via one key\n", " DataKeys={}\n", " # ======================================\n", " # keys for \"screen_dataset_MD\"\n", " DataKeys['data_dir']=PKeys['prefix']+'0_dataprocess_MD/'\n", " # add the folder\n", " UtilityPack.create_path(DataKeys['data_dir'])\n", "\n", " # screening rules\n", " DataKeys['min_AA_seq_len']=0\n", " # DataKeys['max_AA_seq_len']=64\n", " DataKeys['max_AA_seq_len']=128\n", " DataKeys['max_Force_cap']=1000\n", "# # ---------------------------------------\n", "# # special ones\n", "# # change text arr into np arr\n", "# DataKeys['arr_key']=[\n", "# 'posi_data','pull_data','forc_data',\n", "# 'gap_data','normalized_gap_data',\n", "# 'pull_gap_data', 'normalized_pull_gap_data',\n", "# 'sample_NormPullGap_data','sample_FORCEpN_data']\n", "\n", "# df_raw, protein_df = DataSetPack.screen_dataset_MD(\n", "# csv_file=MD_smo_csv_file,\n", "# pk_file=None,\n", "# PKeys=DataKeys, # to be updated\n", "# CKeys=CKeys,\n", "# )\n", " # ++++++++++++++++++++++++++++++++++++++\n", " df_raw, protein_df = DataSetPack.screen_dataset_MD(\n", " csv_file=None,\n", " pk_file=MD_smo_pk_file,\n", " PKeys=DataKeys, # to be updated\n", " CKeys=CKeys,\n", " )\n", "\n", " # save the dataframe\n", " pd.to_pickle(protein_df, DataKeys['data_dir']+'protein_df.pk')\n", " pd.to_pickle(df_raw, DataKeys['data_dir']+'df_raw.pk')\n", "\n", " # ======================================\n", " # keys for 2nd function\n", " DataKeys['X_Key']='sample_FORCEpN_data' # or 'Max_Smo_Force'\n", " #\n", " DataKeys['tokenizer_X']=None # will not be used\n", " DataKeys['tokenizer_y']=None # to be created\n", " # think about this: update this one if necessary\n", " # DataKeys['Xnormfac'] = np.max(protein_df['Max_Smo_Force'])\n", " DataKeys['Xnormfac'] = 750.\n", "\n", " print('Normalization factor for force: ', DataKeys['Xnormfac'])\n", " DataKeys['ynormfac']=1. # not used as esm is used # 21. # old force diffusion model 22.\n", " #\n", " DataKeys['batch_size']=256\n", " DataKeys['testset_ratio']=0.15\n", " DataKeys['maxdata']=99999999991000\n", " # ++ for pLM\n", " # for AA embending using ESM\n", " DataKeys['ESM-2_Model']='esm2_t33_650M_UR50D'\n", " # add for embedding space\n", " DataKeys['image_channels']=1280\n", " #\n", " DataKeys['ESM-2_Model']='esm2_t12_35M_UR50D'\n", " DataKeys['image_channels']=480\n", "\n", "\n", " elif CKeys['Problem_ID']==7:\n", " pass\n", " # print(\"7\")\n", " # # +++++++++++++++++++++++++++++++++++++\n", " # # SecStr text as input seq\n", " # DataKeys={}\n", " # DataKeys['data_dir']=PKeys['prefix']+'0_dataprocess_SS_ModelA/'\n", " # # screening rules\n", " # DataKeys['min_AA_seq_len']=0\n", " # DataKeys['max_AA_seq_len']=64 # 128\n", " # DataKeys['max_text_len']=8\n", " # # X and Y processing\n", " # DataKeys['Xnormfac']=1.\n", " # DataKeys['ynormfac']=1. # for ESM # 21. 22. # 21.\n", " # DataKeys['tokenizer_X']=None\n", " # DataKeys['tokenizer_y']=None\n", " # # deliver\n", " # DataKeys['batch_size']=512\n", " # # for debug purpose\n", " # # DataKeys['batch_size']=1\n", " # DataKeys['testset_ratio']= CKeys['testratio'] # 0.1\n", " # DataKeys['maxdata']=99999999999999999\n", " # # + for AA embending using ESM\n", " # DataKeys['ESM-2_Model']='esm2_t33_650M_UR50D'\n", " # # add for embedding space\n", " # DataKeys['image_channels']=1280\n", " # #\n", " # # add the folder for Data part\n", " # UtilityPack.create_path(DataKeys['data_dir'])\n", "\n", " elif CKeys['Problem_ID']==8:\n", " #\n", " print(\"8: input text condition, output sequence...\")\n", " # ++++++++++++++++++++++++++++++++++++++\n", " # MD record as the input seq\n", " #\n", " # try to convey all para via one key\n", " DataKeys={}\n", " # ======================================\n", " # keys for \"screen_dataset_MD\"\n", " DataKeys['data_dir']=PKeys['prefix']+'0_dataprocess_MD/'\n", " # add the folder\n", " UtilityPack.create_path(DataKeys['data_dir'])\n", "\n", " # screening rules\n", " DataKeys['min_AA_seq_len']=0\n", " DataKeys['max_AA_seq_len']=64\n", " DataKeys['max_AA_seq_len']=128\n", " DataKeys['max_text_len']=2\n", " DataKeys['max_Force_cap']=1000\n", "# # ---------------------------------------------------------\n", "# # special ones\n", "# # change text arr into np arr\n", "# DataKeys['arr_key']=[\n", "# 'posi_data','pull_data','forc_data',\n", "# 'gap_data','normalized_gap_data',\n", "# 'pull_gap_data', 'normalized_pull_gap_data',\n", "# 'sample_NormPullGap_data','sample_FORCEpN_data']\n", "\n", "# df_raw, protein_df = DataSetPack.screen_dataset_MD(\n", "# # # --\n", "# # file_path=MD_smo_csv_file,\n", "# # ++\n", "# csv_file=MD_smo_csv_file,\n", "# pk_file=None,\n", "# PKeys=DataKeys, # to be updated\n", "# CKeys=CKeys,\n", "# )\n", " # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " df_raw, protein_df = DataSetPack.screen_dataset_MD(\n", " csv_file=None,\n", " pk_file=MD_smo_pk_file,\n", " PKeys=DataKeys, # to be updated\n", " CKeys=CKeys,\n", " )\n", "\n", " # save the dataframe\n", " pd.to_pickle(protein_df, DataKeys['data_dir']+'protein_df.pk')\n", " pd.to_pickle(df_raw, DataKeys['data_dir']+'df_raw.pk')\n", "\n", " # ======================================\n", " # keys for 2nd function\n", " DataKeys['X_Key']=['Max_Smo_Force','Int_Smo_ForcPull'] # 'sample_FORCEpN_data' # or 'Max_Smo_Force'\n", " #\n", " DataKeys['tokenizer_X']=None # will not be used\n", " DataKeys['tokenizer_y']=None # will not be used # to be created\n", " #\n", " print('Normalization factor for force: ',\n", " np.max(protein_df['Max_Smo_Force']))\n", " print('Normalization factor for toughness: ',\n", " np.max(protein_df['Int_Smo_ForcPull']))\n", " #\n", " DataKeys['Xnormfac'] = np.array([\n", " np.max(protein_df['Max_Smo_Force']),\n", " np.max(protein_df['Int_Smo_ForcPull'])\n", " ])\n", " #\n", " DataKeys['ynormfac']=1.0 # not used in esm # 21. # old force diffusion model 22.\n", " #\n", " DataKeys['batch_size']=256\n", " DataKeys['testset_ratio']=CKeys['testratio'] # 0.15\n", " DataKeys['maxdata']=99999999991000\n", " # + for AA embending using ESM\n", " DataKeys['ESM-2_Model']='esm2_t33_650M_UR50D'\n", " # add for embedding space\n", " DataKeys['image_channels']=1280\n", "\n", " elif CKeys['Problem_ID']==11:\n", " # copied from Problem_ID=6\n", " #\n", " print(\"11, input MD ForcePath, output AA sequence\")\n", " # ++++++++++++++++++++++++++++++++++++++\n", " # MD record as the tokenized input seq\n", " #\n", " # try to convey all para via one key\n", " DataKeys={}\n", " # ======================================\n", " # keys for \"screen_dataset_MD\"\n", " DataKeys['data_dir']=PKeys['prefix']+'0_dataprocess_MD/'\n", " # add the folder\n", " UtilityPack.create_path(DataKeys['data_dir'])\n", "\n", " # screening rules\n", " DataKeys['min_AA_seq_len']=0\n", " # DataKeys['max_AA_seq_len']=64\n", " DataKeys['max_AA_seq_len']=128\n", " DataKeys['max_Force_cap']=1000\n", "# # ---------------------------------------\n", "# # special ones\n", "# # change text arr into np arr\n", "# DataKeys['arr_key']=[\n", "# 'posi_data','pull_data','forc_data',\n", "# 'gap_data','normalized_gap_data',\n", "# 'pull_gap_data', 'normalized_pull_gap_data',\n", "# 'sample_NormPullGap_data','sample_FORCEpN_data']\n", "\n", "# df_raw, protein_df = DataSetPack.screen_dataset_MD(\n", "# csv_file=MD_smo_csv_file,\n", "# pk_file=None,\n", "# PKeys=DataKeys, # to be updated\n", "# CKeys=CKeys,\n", "# )\n", " # ++++++++++++++++++++++++++++++++++++++\n", " df_raw, protein_df = DataSetPack.screen_dataset_MD(\n", " csv_file=None,\n", " pk_file=MD_smo_pk_file,\n", " PKeys=DataKeys, # to be updated\n", " CKeys=CKeys,\n", " )\n", "\n", " # save the dataframe\n", " pd.to_pickle(protein_df, DataKeys['data_dir']+'protein_df.pk')\n", " pd.to_pickle(df_raw, DataKeys['data_dir']+'df_raw.pk')\n", "\n", " # ======================================\n", " # keys for 2nd function\n", " DataKeys['X_Key']='sample_FORCEpN_data' # or 'Max_Smo_Force'\n", " #\n", " DataKeys['tokenizer_X']=None # will not be used\n", " DataKeys['tokenizer_y']=None # to be created\n", " # think about this: update this one if necessary\n", " # DataKeys['Xnormfac'] = np.max(protein_df['Max_Smo_Force'])\n", " DataKeys['Xnormfac'] = 750.\n", "\n", " print('Normalization factor for force: ', DataKeys['Xnormfac'])\n", " DataKeys['ynormfac']=1. # not used as esm is used # 21. # old force diffusion model 22.\n", " #\n", " DataKeys['batch_size']=256\n", " DataKeys['testset_ratio']=0.15\n", " DataKeys['maxdata']=99999999991000\n", " # ++ for pLM\n", " # for AA embending using ESM\n", " DataKeys['ESM-2_Model']='esm2_t33_650M_UR50D'\n", " # add for embedding space\n", " # DataKeys['image_channels']=1280\n", " #\n", " # DataKeys['ESM-2_Model']='esm2_t12_35M_UR50D'\n", " # # DataKeys['image_channels']=480\n", " #\n", " # DataKeys['ESM-2_Model']='esm2_t36_3B_UR50D'\n", " # DataKeys['image_channels']=2560\n", " #\n", " DataKeys['ESM-2_Model']='esm2_t30_150M_UR50D'\n", " # DataKeys['image_channels']=640\n", "\n", " # only use the probability part\n", " DataKeys['image_channels']=33\n", "\n", " else:\n", " print('No Problem Type found...')\n", "# else:\n", "# # load back if there is anything generated in the 1st run\n", "# if CKeys['Problem_ID']==2 or CKeys['Problem_ID']==6:\n", "# protein_df = pd.read_pickle(DataKeys['data_dir']+'protein_df.pk')\n", "# df_raw = pd.read_pickle(DataKeys['data_dir']+'df_raw.pk')\n", "print(CKeys)\n", "\n", "\n", "# ====================================================\n", "# convert into datasets\n", "# ====================================================\n", "if CKeys['Working_Mode']==1 and CKeys['IF_FirstRun']==1:\n", " if CKeys['Problem_ID']==1:\n", " pass\n", " # train_loader, \\\n", " # train_loader_noshuffle, \\\n", " # test_loader, \\\n", " # tokenizer_y, tokenizer_X = DataSetPack.load_data_set_SS_InSeqToOuSeq(\n", " # file_path=SS_csv_file,\n", " # PKeys=DataKeys, # to be updated\n", " # CKeys=CKeys,\n", " # )\n", "\n", " elif CKeys['Problem_ID']==2:\n", " pass\n", " # train_loader, train_loader_noshuffle, \\\n", " # test_loader, tokenizer_y, tokenizer_X = DataSetPack.load_data_set_from_df_SMD(\n", " # protein_df,\n", " # PKeys=DataKeys, # to be updated\n", " # CKeys=CKeys,\n", " # )\n", "\n", " elif CKeys['Problem_ID']==3:\n", " pass\n", " # train_loader, train_loader_noshuffle, \\\n", " # test_loader,tokenizer_y, tokenizer_X = DataSetPack.load_data_set_seq2seq_SecStr_ModelA (\n", " # file_path=SS_csv_file, # 'PROTEIN_Mar18_2022_SECSTR_ALL.csv',\n", " # PKeys=DataKeys, # to be updated\n", " # CKeys=CKeys,\n", " # )\n", "\n", " elif CKeys['Problem_ID']==4:\n", " pass\n", " # train_loader, train_loader_noshuffle, \\\n", " # test_loader,tokenizer_y, tokenizer_X = DataSetPack.load_data_set_text2seq_MD_ModelA (\n", " # protein_df,\n", " # PKeys=DataKeys, # to be updated\n", " # CKeys=CKeys,\n", " # )\n", "\n", " # ///////////////////////////////////////////////////////////////\n", " # add embedding cases\n", " # \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\n", " elif CKeys['Problem_ID']==5:\n", " pass\n", " # train_loader, \\\n", " # train_loader_noshuffle, \\\n", " # test_loader, \\\n", " # tokenizer_y, \\\n", " # tokenizer_X = DataSetPack.load_data_set_SS_InSeqToOuSeq_pLM(\n", " # file_path=SS_csv_file,\n", " # PKeys=DataKeys, # to be updated\n", " # CKeys=CKeys,\n", " # )\n", " # # this will triger the following downloading\n", " # # Downloading: \"https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t33_650M_UR50D.pt\" to /home/gridsan/bni/.cache/torch/hub/checkpoints/esm2_t33_650M_UR50D.pt\n", " # # excute the following if without internet on the node\n", " # # 1 $ wget https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t33_650M_UR50D.pt -O /home/gridsan/bni/.cache/torch/hub/checkpoints/esm2_t33_650M_UR50D.pt\n", " # # 2 $ wget https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t33_650M_UR50D-contact-regression.pt -O /home/gridsan/bni/.cache/torch/hub/checkpoints/esm2_t33_650M_UR50D-contact-regression.pt\n", "\n", " # add if needed\n", " elif CKeys['Problem_ID']==6:\n", "\n", " train_loader, \\\n", " train_loader_noshuffle, \\\n", " test_loader, \\\n", " tokenizer_y, \\\n", " tokenizer_X = DataSetPack.load_data_set_from_df_SMD_pLM(\n", " protein_df,\n", " PKeys=DataKeys, # to be updated\n", " CKeys=CKeys,\n", " )\n", "\n", " elif CKeys['Problem_ID']==7:\n", " pass\n", " # train_loader, \\\n", " # train_loader_noshuffle, \\\n", " # test_loader, \\\n", " # tokenizer_y, \\\n", " # tokenizer_X = DataSetPack.load_data_set_seq2seq_SecStr_ModelA_pLM (\n", " # file_path=SS_csv_file, # 'PROTEIN_Mar18_2022_SECSTR_ALL.csv',\n", " # PKeys=DataKeys, # to be updated\n", " # CKeys=CKeys,\n", " # )\n", "\n", " elif CKeys['Problem_ID']==8:\n", "\n", " train_loader, \\\n", " train_loader_noshuffle, \\\n", " test_loader,\\\n", " tokenizer_y, \\\n", " tokenizer_X = DataSetPack.load_data_set_text2seq_MD_ModelA_pLM (\n", " protein_df,\n", " PKeys=DataKeys, # to be updated\n", " CKeys=CKeys,\n", " )\n", "\n", " elif CKeys['Problem_ID']==11:\n", "\n", " train_loader, \\\n", " train_loader_noshuffle, \\\n", " test_loader, \\\n", " tokenizer_y, \\\n", " tokenizer_X = DataSetPack.load_data_set_from_df_SMD_pLM(\n", " protein_df,\n", " PKeys=DataKeys, # to be updated\n", " CKeys=CKeys,\n", " )\n", "\n", " elif CKeys['Problem_ID']==12:\n", " pass\n", "\n", " else:\n", " print('No Problem Type found...')\n", "\n", " print(\"==========================================\")\n", " print(\"Save the datasets ...\")\n", " print(\"==========================================\")\n", " # save the dataset for for the 1st run\n", " data_pack = {}\n", " data_pack['train_loader']=train_loader\n", " data_pack['train_loader_noshuffle']=train_loader_noshuffle\n", " data_pack['test_loader']=test_loader\n", " data_pack['tokenizer_X']=tokenizer_X\n", " data_pack['tokenizer_y']=tokenizer_y\n", " # keys\n", " data_pack['DataKeys']=DataKeys\n", " # data_pack['CKeys']=CKeys\n", " data_pack['PKeys']=PKeys\n", " with open(PKeys['pk_data_pack'], 'wb') as handle:\n", " pickle.dump(data_pack, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", "\n", "else: # work both for training and testing\n", "\n", " print('This is not the first run')\n", " print('Load back in the data packages...')\n", " with open(PKeys['pk_data_pack'], 'rb') as handle:\n", " data_pack = pickle.load(handle)\n", " # deliver the results\n", " train_loader=data_pack['train_loader']\n", " train_loader_noshuffle=data_pack['train_loader_noshuffle']\n", " test_loader=data_pack['test_loader']\n", " tokenizer_X=data_pack['tokenizer_X']\n", " tokenizer_y=data_pack['tokenizer_y']\n", " # keys (create or update)\n", " DataKeys=data_pack['DataKeys']\n", " # ++ for colab, need to update the path part\n", " DataKeys['data_dir']=this_working_path+'0_dataprocess_MD/'\n", " # ++\n", " # CKeys=data_pack['CKeys']\n", " PKeys=data_pack['PKeys']\n", " # ++ for colab, need to update the path part\n", " PKeys['prefix']=this_working_path\n", " PKeys['pk_data_pack']=this_working_path+'data_pack.pickle'\n", " PKeys['pk_model_pack']=this_working_path+'model_pack.pickle'\n", " # ++\n", " # add some for specific problem\n", " if CKeys['Problem_ID']==2 or CKeys['Problem_ID']==6 \\\n", " or CKeys['Problem_ID']==11:\n", " pass\n", " # skip\n", " # protein_df = pd.read_pickle(DataKeys['data_dir']+'protein_df.pk')\n", " # df_raw = pd.read_pickle(DataKeys['data_dir']+'df_raw.pk')\n", " print('Done.')\n", "\n", "\n", "print(DataKeys)\n", "print(this_working_path)" ] }, { "cell_type": "code", "execution_count": 9, "id": "TwXmEpWhjWcI", "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "TwXmEpWhjWcI", "outputId": "16b3a0af-a41e-4157-c384-ce6ab1a76770" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "tokenizer_X: None\n", "tokenizer_y: None\n", "Xnormfac: 750.0\n", "ynormfac: 1.0\n" ] } ], "source": [ "#@title ### 1.2.1 Prepare for de novo conditions\n", "\n", "# add a block to handle the De Novo force shape\n", "# idea: pick to to mix them\n", "# if CKeys['Working_Mode']==1 and CKeys['IF_FirstRun']==1 and CKeys['Problem_ID']==2:\n", "if CKeys['Working_Mode']==1 and CKeys['Problem_ID']==2:\n", " print( protein_df.keys() )\n", " # pick the Max_Smo_Force max and min\n", " pick_id_0 = protein_df['Max_Smo_Force'].idxmax()\n", " pick_id_1 = protein_df['Max_Smo_Force'].idxmin()\n", " print(protein_df['Max_Smo_Force'].idxmax())\n", " print(protein_df['Max_Smo_Force'].idxmin())\n", "\n", " fig = plt.figure(figsize=(24,16),dpi=200)\n", " fig, ax0 = plt.subplots()\n", " for ii in [pick_id_0, pick_id_1]:\n", " ax0.plot(\n", " protein_df['sample_NormPullGap_data'][ii],\n", " protein_df['sample_FORCE_data'][ii],\n", " alpha=0.1,\n", " # color=\"green\",label='simplified data',\n", " # linestyle='None',marker='^'\n", " )\n", " ax0.scatter(\n", " protein_df['NPullGap_for_MaxSmoF'][ii],\n", " protein_df['Max_Smo_Force'][ii],\n", " )\n", " plt.xlabel('Normalized distance btw pulling ends')\n", " plt.ylabel('Force (pF)')\n", " outname = DataKeys['data_dir']+'CSV_6_PickMaxMinF_SMD_sim_Dist.jpg'\n", " if CKeys['SlientRun'] ==1:\n", " plt.savefig(outname, dpi=200)\n", " else:\n", " plt.show()\n", " plt.close()\n", "\n", "elif CKeys['Working_Mode']==1 and CKeys['Problem_ID']==5:\n", " #\n", " pass\n", " #\n", "elif CKeys['Working_Mode']==1 and CKeys['Problem_ID']==6:\n", " print( protein_df.keys() )\n", " # pick the Max_Smo_Force max and min\n", " pick_id_0 = protein_df['Max_Smo_Force'].idxmax()\n", " pick_id_1 = protein_df['Max_Smo_Force'].idxmin()\n", " # print(protein_df['Max_Smo_Force'].idxmax())\n", " # print(protein_df['Max_Smo_Force'].idxmin())\n", " print(f\"Max Peak Force id: {pick_id_0}\")\n", " print(f\"Min Peak Force id: {pick_id_1}\")\n", "\n", " fig = plt.figure(figsize=(24,16),dpi=200)\n", " fig, ax0 = plt.subplots()\n", " for ii in [pick_id_0, pick_id_1]:\n", " ax0.plot(\n", " protein_df['sample_NormPullGap_data'][ii],\n", " protein_df['sample_FORCE_data'][ii],\n", " alpha=0.1,\n", " # color=\"green\",label='simplified data',\n", " # linestyle='None',marker='^'\n", " )\n", " ax0.scatter(\n", " protein_df['NPullGap_for_MaxSmoF'][ii],\n", " protein_df['Max_Smo_Force'][ii],\n", " )\n", " plt.xlabel('Normalized distance btw pulling ends')\n", " plt.ylabel('Force (pF)')\n", " outname = DataKeys['data_dir']+'CSV_6_PickMaxMinF_SMD_sim_Dist.jpg'\n", " if CKeys['SlientRun'] ==1:\n", " plt.savefig(outname, dpi=200)\n", " else:\n", " plt.show()\n", " plt.close()\n", "\n", "elif CKeys['Working_Mode']==1 and CKeys['Problem_ID']==11:\n", " # skipped\n", " pass\n", " # print( protein_df.keys() )\n", " # # pick the Max_Smo_Force max and min\n", " # pick_id_0 = protein_df['Max_Smo_Force'].idxmax()\n", " # pick_id_1 = protein_df['Max_Smo_Force'].idxmin()\n", " # # print(protein_df['Max_Smo_Force'].idxmax())\n", " # # print(protein_df['Max_Smo_Force'].idxmin())\n", " # print(f\"Max Peak Force id: {pick_id_0}\")\n", " # print(f\"Min Peak Force id: {pick_id_1}\")\n", "\n", " # fig = plt.figure(figsize=(24,16),dpi=200)\n", " # fig, ax0 = plt.subplots()\n", " # for ii in [pick_id_0, pick_id_1]:\n", " # ax0.plot(\n", " # protein_df['sample_NormPullGap_data'][ii],\n", " # protein_df['sample_FORCE_data'][ii],\n", " # alpha=0.1,\n", " # # color=\"green\",label='simplified data',\n", " # # linestyle='None',marker='^'\n", " # )\n", " # ax0.scatter(\n", " # protein_df['NPullGap_for_MaxSmoF'][ii],\n", " # protein_df['Max_Smo_Force'][ii],\n", " # )\n", " # plt.xlabel('Normalized distance btw pulling ends')\n", " # plt.ylabel('Force (pF)')\n", " # outname = DataKeys['data_dir']+'CSV_6_PickMaxMinF_SMD_sim_Dist.jpg'\n", " # if CKeys['SlientRun'] ==1:\n", " # plt.savefig(outname, dpi=200)\n", " # else:\n", " # plt.show()\n", " # plt.close()\n", "# a quick check\n", "print('tokenizer_X: ', tokenizer_X)\n", "print('tokenizer_y: ', tokenizer_y)\n", "print('Xnormfac: ', DataKeys['Xnormfac'])\n", "print('ynormfac: ', DataKeys['ynormfac'])\n", "\n", "if CKeys['Debug']==1:\n", "\n", " if CKeys['Debug_DataPack']==1:\n", " # uppack to check\n", " print('Len of train loader:', len(train_loader))\n", " print('Len of test loader:', len(test_loader))\n", " ii=-1\n", " # for item in train_loader:\n", " for item in test_loader:\n", " ii += 1\n", " if ii<1:\n", " print('Len of 1st batch item: ', len(item))\n", " this_item = item\n", " else:\n", " pass\n", "\n", " # on mini-batch\n", " print('Batch size: ', DataKeys['batch_size'])\n", " print('Seq len: ', DataKeys['max_AA_seq_len'])\n", " print('X.dim: ', this_item[0].shape) # Condition: (Batch, Condi)\n", " print('y_data.dim: ', this_item[1].shape) # AASequenc: (Batch, AACode)\n", " #\n", " # # print(this_item[0][0,:]*DataKeys['Xnormfac'])\n", " # # print(this_item[1][0,:]*DataKeys['ynormfac'])\n", " print(this_item[0][0,:])\n", " # print(torch.FloatTensor(DataKeys['Xnormfac']))\n", " # print(this_item[0][0,:]*torch.FloatTensor(DataKeys['Xnormfac']))\n", "\n", "if CKeys['Debug']==1:\n", "\n", " if CKeys['Debug_DataPack']==1:\n", "\n", " if CKeys['Problem_ID']==8 or CKeys['Problem_ID']==7:\n", "\n", " print (this_item[0][0,:])\n", " print (this_item[0][0,:]*DataKeys['Xnormfac'])" ] }, { "cell_type": "code", "execution_count": 10, "id": "4lE_XT0bjtAV", "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "4lE_XT0bjtAV", "outputId": "c6f59197-a917-4549-aa51-bf72739defd1" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "==================================================\n", "load in the MODEL key from the previous storage\n", "==================================================\n", "This is not the first run\n", "Load back in the model packages...\n", "Done.\n", "128\n", "33\n", "33\n", "33\n", "{\n", " \"dim\": 256,\n", " \"text_embed_dim\": 544,\n", " \"num_resnet_blocks\": 1,\n", " \"cond_dim\": 512,\n", " \"num_image_tokens\": null,\n", " \"num_time_tokens\": null,\n", " \"learned_sinu_pos_emb_dim\": null,\n", " \"out_dim\": null,\n", " \"dim_mults\": [\n", " 1,\n", " 2,\n", " 4,\n", " 8\n", " ],\n", " \"cond_images_channels\": 33,\n", " \"channels\": 33,\n", " \"channels_out\": 33,\n", " \"attn_dim_head\": 64,\n", " \"attn_heads\": 8,\n", " \"ff_mult\": 2.0,\n", " \"lowres_cond\": false,\n", " \"layer_attns\": [\n", " false,\n", " true,\n", " true,\n", " false\n", " ],\n", " \"layer_attns_depth\": 1,\n", " \"layer_attns_add_text_cond\": true,\n", " \"attend_at_middle\": true,\n", " \"layer_cross_attns\": [\n", " false,\n", " true,\n", " true,\n", " false\n", " ],\n", " \"use_linear_attn\": false,\n", " \"use_linear_cross_attn\": false,\n", " \"cond_on_text\": true,\n", " \"max_text_len\": 128,\n", " \"init_dim\": null,\n", " \"resnet_groups\": 8,\n", " \"init_conv_kernel_size\": 7,\n", " \"init_cross_embed\": false,\n", " \"init_cross_embed_kernel_sizes\": [\n", " 3,\n", " 7,\n", " 15\n", " ],\n", " \"cross_embed_downsample\": false,\n", " \"cross_embed_downsample_kernel_sizes\": [\n", " 2,\n", " 4\n", " ],\n", " \"attn_pool_text\": true,\n", " \"attn_pool_num_latents\": 32,\n", " \"dropout\": 0.0,\n", " \"memory_efficient\": false,\n", " \"init_conv_to_final_conv_residual\": false,\n", " \"use_global_context_attn\": true,\n", " \"scale_skip_connection\": true,\n", " \"final_resnet_block\": true,\n", " \"final_conv_kernel_size\": 3,\n", " \"cosine_sim_attn\": true,\n", " \"self_cond\": false,\n", " \"combine_upsample_fmaps\": true,\n", " \"pixel_shuffle_upsample\": false,\n", " \"beginning_and_final_conv_present\": null\n", "}\n", "{'timesteps': (96,), 'dim': 256, 'pred_dim': 33, 'loss_type': 0, 'elucidated': True, 'padding_idx': 0, 'cond_dim': None, 'text_embed_dim': None, 'input_tokens': None, 'sequence_embed': None, 'embed_dim_position': None, 'max_text_len': 128, 'cond_images_channels': 33, 'max_length': 128, 'device': device(type='cuda', index=0)}\n", "33\n", "33\n", "33\n", "33\n", "33\n", "\n", "256\n", "256\n", "--------------------------------------------\n", "On OneD_Unet...\n", "--------------------------------------------\n", "256 33\n", "Working unet part model: \n", "Total parameters: 264376242 trainable parameters: 264376242\n", "--------------------------------------------\n", "On whole model...\n", "--------------------------------------------\n", "Model B: Generative protein diffusion model, residue-based\n", "Using condition as the initial sequence\n", "Use conditioning image during training....\n", "Loss type: 0\n", "Channels in=33, channels out=33\n", "Test on cast_model_parameters...\n", "False\n", "False\n", "None\n", "33\n", "33\n", "256 33\n", "cpu\n", "Total working model: \n", "Total parameters: 262527570 trainable parameters: 262527570\n", "Recasted unet inside the tot one only: \n", "Total parameters: 262521266 trainable parameters: 262521266\n", "--------------------------------------------\n", "On trainer...\n", "--------------------------------------------\n" ] } ], "source": [ "#@title ### 1.3 Model building\n", "\n", "# import PD_pLMProbXDiff.ModelPack as ModelPack\n", "# #\n", "# import PD_pLMProbXDiff.TrainerPack as TrainerPack\n", "# #\n", "# importlib.reload(ModelPack)\n", "# importlib.reload(TrainerPack)\n", "\n", "if CKeys['Working_Mode']==1 and CKeys['IF_FirstRun']==1:\n", " # this is a trining mode\n", " if CKeys['Problem_ID']==1:\n", " pass\n", "# # this is 1st run....\n", "# # +++++++++++++++++++++++++++++++++++++\n", "# # SecStr as input seq\n", "# ModelKeys={}\n", "# # storage\n", "# ModelKeys['model_dir']=PKey['prefix']+'1_model_SS/'\n", "# # create the folder\n", "# print(\"Creating the model dir...\")\n", "# UtilityPack.create_path(ModelKeys['model_dir'])\n", "# # secondary folders\n", "# ModelKeys['model_dir_sample']=ModelKeys['model_dir']+'0_mid_sample/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_sample'])\n", "# ModelKeys['model_dir_model']=ModelKeys['model_dir']+'1_store_model/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_model'])\n", "\n", "# # for UNet\n", "# ModelKeys['dim']=256 # dim for UNet\n", "# ModelKeys['text_embed_dim'] = 512\n", "# ModelKeys['embed_dim_position']=32\n", "# ModelKeys['text_embed_dim']=ModelKeys['text_embed_dim']+ModelKeys['embed_dim_position']\n", "# ModelKeys['cond_dim'] = 512\n", "# ModelKeys['cond_images_channels']=1\n", "# ModelKeys['max_text_len']=DataKeys['max_AA_seq_len'] # this is about text condi\n", "# # for Imagen\n", "# ModelKeys['pred_dim']=1 # for sequence, =1\n", "# ModelKeys['diff_timesteps']=(96,)\n", "# ModelKeys['loss_type']=0 # MSE\n", "# ModelKeys['elucidated']=True #\n", "# ModelKeys['padding_idx']=0\n", "# ModelKeys['max_length']=DataKeys['max_AA_seq_len'] # max seq len\n", "# ModelKeys['device']=device\n", "# #\n", "# # buckets for Model building\n", "# ModelKeys['UNet']={}\n", "# ModelKeys['Imagen']={}\n", "# # ++++++++++++++++++++++++++++++++++++++++++++++++++\n", "# write_PK_UNet=dict()\n", "# write_PK_UNet['dim']=ModelKeys['dim'] # 256\n", "# write_PK_UNet['text_embed_dim']=ModelKeys['text_embed_dim'] # 512+32\n", "# write_PK_UNet['num_resnet_blocks']=1\n", "# write_PK_UNet['cond_dim']=ModelKeys['cond_dim'] #this is where text embeddings are projected to...\n", "# # write_PK_UNet['num_image_tokens']=None # using the default value\n", "# # write_PK_UNet['num_time_tokens']=None # using the default\n", "# # write_PK_UNet['learned_sinu_pos_emb_dim']=None\n", "# # write_PK_UNet['out_dim']=None\n", "# write_PK_UNet['dim_mults']=(1, 2, 4, 8)\n", "\n", "# write_PK_UNet['cond_images_channels']=ModelKeys['cond_images_channels']\n", "# write_PK_UNet['channels']=1\n", "# write_PK_UNet['channels_out']=1\n", "\n", "# write_PK_UNet['attn_dim_head']=64\n", "# write_PK_UNet['attn_heads']=8\n", "# write_PK_UNet['ff_mult']=2.\n", "# write_PK_UNet['lowres_cond']=False # for cascading diffusion - https://cascaded-diffusion.github.io/\n", "\n", "# write_PK_UNet['layer_attns']=(False, True, True, False)\n", "# write_PK_UNet['layer_attns_depth']=1\n", "# write_PK_UNet['layer_attns_add_text_cond']=True # whether to condition the self-attention blocks with the text embeddings, as described in Appendix D.3.1\n", "# write_PK_UNet['attend_at_middle']=True # whether to have a layer of attention at the bottleneck (can turn off for higher resolution in cascading DDPM, before bringing in efficient attention)\n", "# write_PK_UNet['layer_cross_attns']=(False, True, True, False)\n", "# write_PK_UNet['use_linear_attn']=False\n", "# write_PK_UNet['use_linear_cross_attn']=False\n", "\n", "# write_PK_UNet['cond_on_text']=True\n", "# write_PK_UNet['max_text_len']=ModelKeys['max_text_len'] # need to check this one\n", "# # ModelKeys['UNet']['init_dim']=None\n", "# write_PK_UNet['resnet_groups']=8\n", "# write_PK_UNet['init_conv_kernel_size']=7 # kernel size of initial conv, if not using cross embed\n", "# write_PK_UNet['init_cross_embed']=False #TODO - fix ouput size calcs for conv1d\n", "# write_PK_UNet['init_cross_embed_kernel_sizes']=(3, 7, 15)\n", "# write_PK_UNet['cross_embed_downsample']=False\n", "# write_PK_UNet['cross_embed_downsample_kernel_sizes']=(2, 4)\n", "\n", "# write_PK_UNet['attn_pool_text']=True\n", "# write_PK_UNet['attn_pool_num_latents']=32 #perceiver model latents\n", "# write_PK_UNet['dropout']=0.\n", "# write_PK_UNet['memory_efficient']=False\n", "# write_PK_UNet['init_conv_to_final_conv_residual']=False\n", "\n", "# write_PK_UNet['use_global_context_attn']=True\n", "# write_PK_UNet['scale_skip_connection']=True\n", "# write_PK_UNet['final_resnet_block']=True\n", "# write_PK_UNet['final_conv_kernel_size']=3\n", "\n", "# write_PK_UNet['cosine_sim_attn']=True\n", "# write_PK_UNet['self_cond']=False\n", "# write_PK_UNet['combine_upsample_fmaps']=True # combine feature maps from all upsample blocks, used in unet squared successfully\n", "# write_PK_UNet['pixel_shuffle_upsample']=False # may address checkboard artifacts\n", "# # write_PK_UNet['beginning_and_final_conv_present']=None # use default\n", "# ModelKeys['UNet']=UtilityPack.prepare_UNet_keys(write_PK_UNet)\n", "# # +++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "# # beyond UNet, for Whole model, all keys\n", "# # \"None\" means defult value on function definition\n", "# write_PK_Imagen=dict()\n", "\n", "# write_PK_Imagen['timesteps']=ModelKeys['diff_timesteps']\n", "# write_PK_Imagen['dim']=ModelKeys['dim']\n", "# write_PK_Imagen['pred_dim']=ModelKeys['pred_dim']\n", "# write_PK_Imagen['loss_type']=ModelKeys['loss_type'] # 0 # MSE\n", "# write_PK_Imagen['elucidated']=ModelKeys['elucidated'] # True\n", "# write_PK_Imagen['padding_idx']=ModelKeys['padding_idx'] # 0 # need to check\n", "# # write_PK_Imagen['cond_dim']=None # use default\n", "# # write_PK_Imagen['text_embed_dim']=None\n", "# # write_PK_Imagen['input_tokens']=None\n", "# # write_PK_Imagen['sequence_embed']=None\n", "# # write_PK_Imagen['embed_dim_position']=None\n", "# write_PK_Imagen['max_text_len']=ModelKeys['max_text_len']\n", "# write_PK_Imagen['cond_images_channels']=ModelKeys['pred_dim']\n", "# write_PK_Imagen['max_length']=ModelKeys['max_length']\n", "# write_PK_Imagen['device']=ModelKeys['device']\n", "\n", "# # extend it to a full key\n", "# ModelKeys['Imagen']=UtilityPack.prepare_ModelB_keys(write_PK_Imagen)\n", "\n", " elif CKeys['Problem_ID']==2:\n", " pass\n", "# # this is 1st run....\n", "# # +++++++++++++++++++++++++++++++++++++\n", "# # SecStr as input seq\n", "# ModelKeys={}\n", "# # storage\n", "# ModelKeys['model_dir']=PKeys['prefix']+'1_model_SS/'\n", "# # create the folder\n", "# print(\"Creating the model dir...\")\n", "# UtilityPack.create_path(ModelKeys['model_dir'])\n", "# # secondary folders\n", "# ModelKeys['model_dir_sample']=ModelKeys['model_dir']+'0_mid_sample/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_sample'])\n", "# ModelKeys['model_dir_model']=ModelKeys['model_dir']+'1_store_model/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_model'])\n", "\n", "# # for UNet\n", "# ModelKeys['dim']=256 # dim for UNet\n", "# ModelKeys['text_embed_dim'] = 512\n", "# ModelKeys['embed_dim_position']=32\n", "# ModelKeys['text_embed_dim']=ModelKeys['text_embed_dim']+ModelKeys['embed_dim_position']\n", "# ModelKeys['cond_dim'] = 512\n", "# ModelKeys['cond_images_channels']=1\n", "# ModelKeys['max_text_len']=DataKeys['max_AA_seq_len'] # this is about text condi\n", "# # for Imagen\n", "# ModelKeys['pred_dim']=1 # for sequence, =1\n", "# ModelKeys['diff_timesteps']=(96,)\n", "# ModelKeys['loss_type']=0 # MSE\n", "# ModelKeys['elucidated']=True #\n", "# ModelKeys['padding_idx']=0\n", "# ModelKeys['max_length']=DataKeys['max_AA_seq_len'] # max seq len\n", "# ModelKeys['device']=device\n", "# #\n", "# # buckets for Model building\n", "# ModelKeys['UNet']={}\n", "# ModelKeys['Imagen']={}\n", "# # ++++++++++++++++++++++++++++++++++++++++++++++++++\n", "# write_PK_UNet=dict()\n", "# write_PK_UNet['dim']=ModelKeys['dim'] # 256\n", "# write_PK_UNet['text_embed_dim']=ModelKeys['text_embed_dim'] # 512+32\n", "# write_PK_UNet['num_resnet_blocks']=1\n", "# write_PK_UNet['cond_dim']=ModelKeys['cond_dim'] #this is where text embeddings are projected to...\n", "# # write_PK_UNet['num_image_tokens']=None # using the default value\n", "# # write_PK_UNet['num_time_tokens']=None # using the default\n", "# # write_PK_UNet['learned_sinu_pos_emb_dim']=None\n", "# # write_PK_UNet['out_dim']=None\n", "# write_PK_UNet['dim_mults']=(1, 2, 4, 8)\n", "\n", "# write_PK_UNet['cond_images_channels']=ModelKeys['cond_images_channels']\n", "# write_PK_UNet['channels']=1\n", "# write_PK_UNet['channels_out']=1\n", "\n", "# write_PK_UNet['attn_dim_head']=64\n", "# write_PK_UNet['attn_heads']=8\n", "# write_PK_UNet['ff_mult']=2.\n", "# write_PK_UNet['lowres_cond']=False # for cascading diffusion - https://cascaded-diffusion.github.io/\n", "\n", "# write_PK_UNet['layer_attns']=(False, True, True, False)\n", "# write_PK_UNet['layer_attns_depth']=1\n", "# write_PK_UNet['layer_attns_add_text_cond']=True # whether to condition the self-attention blocks with the text embeddings, as described in Appendix D.3.1\n", "# write_PK_UNet['attend_at_middle']=True # whether to have a layer of attention at the bottleneck (can turn off for higher resolution in cascading DDPM, before bringing in efficient attention)\n", "# write_PK_UNet['layer_cross_attns']=(False, True, True, False)\n", "# write_PK_UNet['use_linear_attn']=False\n", "# write_PK_UNet['use_linear_cross_attn']=False\n", "\n", "# write_PK_UNet['cond_on_text']=True\n", "# write_PK_UNet['max_text_len']=ModelKeys['max_text_len'] # need to check this one\n", "# # ModelKeys['UNet']['init_dim']=None\n", "# write_PK_UNet['resnet_groups']=8\n", "# write_PK_UNet['init_conv_kernel_size']=7 # kernel size of initial conv, if not using cross embed\n", "# write_PK_UNet['init_cross_embed']=False #TODO - fix ouput size calcs for conv1d\n", "# write_PK_UNet['init_cross_embed_kernel_sizes']=(3, 7, 15)\n", "# write_PK_UNet['cross_embed_downsample']=False\n", "# write_PK_UNet['cross_embed_downsample_kernel_sizes']=(2, 4)\n", "\n", "# write_PK_UNet['attn_pool_text']=True\n", "# write_PK_UNet['attn_pool_num_latents']=32 #perceiver model latents\n", "# write_PK_UNet['dropout']=0.\n", "# write_PK_UNet['memory_efficient']=False\n", "# write_PK_UNet['init_conv_to_final_conv_residual']=False\n", "\n", "# write_PK_UNet['use_global_context_attn']=True\n", "# write_PK_UNet['scale_skip_connection']=True\n", "# write_PK_UNet['final_resnet_block']=True\n", "# write_PK_UNet['final_conv_kernel_size']=3\n", "\n", "# write_PK_UNet['cosine_sim_attn']=True\n", "# write_PK_UNet['self_cond']=False\n", "# write_PK_UNet['combine_upsample_fmaps']=True # combine feature maps from all upsample blocks, used in unet squared successfully\n", "# write_PK_UNet['pixel_shuffle_upsample']=False # may address checkboard artifacts\n", "# # write_PK_UNet['beginning_and_final_conv_present']=None # use default\n", "# ModelKeys['UNet']=UtilityPack.prepare_UNet_keys(write_PK_UNet)\n", "# # +++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "# # beyond UNet, for Whole model, all keys\n", "# # \"None\" means defult value on function definition\n", "# write_PK_Imagen=dict()\n", "\n", "# write_PK_Imagen['timesteps']=ModelKeys['diff_timesteps']\n", "# write_PK_Imagen['dim']=ModelKeys['dim']\n", "# write_PK_Imagen['pred_dim']=ModelKeys['pred_dim']\n", "# write_PK_Imagen['loss_type']=ModelKeys['loss_type'] # 0 # MSE\n", "# write_PK_Imagen['elucidated']=ModelKeys['elucidated'] # True\n", "# write_PK_Imagen['padding_idx']=ModelKeys['padding_idx'] # 0 # need to check\n", "# # write_PK_Imagen['cond_dim']=None # use default\n", "# # write_PK_Imagen['text_embed_dim']=None\n", "# # write_PK_Imagen['input_tokens']=None\n", "# # write_PK_Imagen['sequence_embed']=None\n", "# # write_PK_Imagen['embed_dim_position']=None\n", "# write_PK_Imagen['max_text_len']=ModelKeys['max_text_len']\n", "# write_PK_Imagen['cond_images_channels']=ModelKeys['cond_images_channels']\n", "# write_PK_Imagen['max_length']=ModelKeys['max_length']\n", "# write_PK_Imagen['device']=ModelKeys['device']\n", "\n", "# # extend it to a full key\n", "# ModelKeys['Imagen']=UtilityPack.prepare_ModelB_keys(write_PK_Imagen)\n", "# # to be defined\n", "\n", " elif CKeys['Problem_ID']==3:\n", " pass\n", "\n", "# ModelKeys={}\n", "# # storage\n", "# ModelKeys['model_dir']=PKeys['prefix']+'1_model_SS/'\n", "# # create the folder\n", "# print(\"Creating the model dir...\")\n", "# UtilityPack.create_path(ModelKeys['model_dir'])\n", "# # secondary folders\n", "# ModelKeys['model_dir_sample']=ModelKeys['model_dir']+'0_mid_sample/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_sample'])\n", "# ModelKeys['model_dir_model']=ModelKeys['model_dir']+'1_store_model/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_model'])\n", "\n", "# # for UNet\n", "# ModelKeys['dim']=768 # dim for UNet\n", "# # for debug\n", "# ModelKeys['dim']=256 # dim for UNet: GPU stat: 256:16093MiB;\n", "# #\n", "# ModelKeys['text_embed_dim']=512-128\n", "# ModelKeys['embed_dim_position']=128\n", "# # ! this is for UNet, not for Imagen\n", "# ModelKeys['text_embed_dim']=ModelKeys['text_embed_dim']+ModelKeys['embed_dim_position']\n", "# ModelKeys['cond_dim']=512\n", "# # ModelKeys['embed_dim_position']=128\n", "# # ModelKeys['text_embed_dim']=ModelKeys['cond_dim']-ModelKeys['embed_dim_position']\n", "# # !!! Need to check this one = 1 or not\n", "# ModelKeys['cond_images_channels']=0 # 1\n", "# # !!! Need to check this one: should it be 8 or 64?\n", "# ModelKeys['max_text_len']= DataKeys['max_text_len'] # DataKeys['max_AA_seq_len'] # 8 # this is about text condi\n", "\n", "# # for Imagen\n", "# ModelKeys['pred_dim']=1 # for sequence, =1\n", "# ModelKeys['diff_timesteps']=(96) # (96,)\n", "# ModelKeys['loss_type']=0 # MSE\n", "# ModelKeys['elucidated']=True #\n", "# ModelKeys['padding_idx']=0\n", "# ModelKeys['max_length']=DataKeys['max_AA_seq_len']\n", "# ModelKeys['device']=device\n", "# #\n", "# # baskets for Model building\n", "# ModelKeys['UNet']={}\n", "# ModelKeys['Imagen']={}\n", "# # ++++++++++++++++++++++++++++++++++++++\n", "# # prepare the Unet Key\n", "# write_PK_UNet=dict()\n", "# # used ones\n", "# write_PK_UNet['dim']=ModelKeys['dim']\n", "# write_PK_UNet['text_embed_dim']=ModelKeys['text_embed_dim']\n", "# write_PK_UNet['num_resnet_blocks']=1\n", "# write_PK_UNet['cond_dim']=ModelKeys['cond_dim'] #this is where text embeddings are projected to...\n", "# # write_PK_UNet['num_image_tokens']=None # using the default value\n", "# # write_PK_UNet['num_time_tokens']=None # using the default\n", "# # write_PK_UNet['learned_sinu_pos_emb_dim']=None\n", "# # write_PK_UNet['out_dim']=None\n", "# write_PK_UNet['dim_mults']=(1, 2, 4, 8)\n", "\n", "# write_PK_UNet['cond_images_channels']=ModelKeys['cond_images_channels']\n", "# write_PK_UNet['channels']=ModelKeys['pred_dim']\n", "# write_PK_UNet['channels_out']=ModelKeys['pred_dim']\n", "\n", "# write_PK_UNet['attn_dim_head']=64\n", "# write_PK_UNet['attn_heads']=8\n", "# write_PK_UNet['ff_mult']=2.\n", "# write_PK_UNet['lowres_cond']=False # for cascading diffusion - https://cascaded-diffusion.github.io/\n", "\n", "\n", "# write_PK_UNet['layer_attns']=(False, True, True, False)\n", "# write_PK_UNet['layer_attns_depth']=1\n", "# write_PK_UNet['layer_attns_add_text_cond']=True # whether to condition the self-attention blocks with the text embeddings, as described in Appendix D.3.1\n", "# write_PK_UNet['attend_at_middle']=True # whether to have a layer of attention at the bottleneck (can turn off for higher resolution in cascading DDPM, before bringing in efficient attention)\n", "# write_PK_UNet['layer_cross_attns']=(False, True, True, False)\n", "# write_PK_UNet['use_linear_attn']=False\n", "# write_PK_UNet['use_linear_cross_attn']=False\n", "\n", "# write_PK_UNet['cond_on_text'] = True\n", "# # !!! Need to check this: 63, Imagen used a different one\n", "# write_PK_UNet['max_text_len'] = DataKeys['max_AA_seq_len'] # ModelKeys['max_text_len']\n", "# # write_PK_UNet['init_dim'] = None\n", "# write_PK_UNet['resnet_groups'] = 8\n", "# write_PK_UNet['init_conv_kernel_size'] =7 # kernel size of initial conv, if not using cross embed\n", "# write_PK_UNet['init_cross_embed'] = False #TODO - fix ouput size calcs for conv1d\n", "# write_PK_UNet['init_cross_embed_kernel_sizes'] = (3, 7, 15)\n", "# write_PK_UNet['cross_embed_downsample'] = False\n", "# write_PK_UNet['cross_embed_downsample_kernel_sizes'] = (2, 4)\n", "\n", "# write_PK_UNet['attn_pool_text'] = True\n", "# write_PK_UNet['attn_pool_num_latents'] = 32 #32, #perceiver model latents\n", "# write_PK_UNet['dropout'] = 0.\n", "# write_PK_UNet['memory_efficient'] = False\n", "# write_PK_UNet['init_conv_to_final_conv_residual'] = False\n", "\n", "# write_PK_UNet['use_global_context_attn'] = True\n", "# write_PK_UNet['scale_skip_connection'] = True\n", "# write_PK_UNet['final_resnet_block'] = True\n", "# write_PK_UNet['final_conv_kernel_size'] = 3\n", "\n", "# write_PK_UNet['cosine_sim_attn'] = True\n", "# write_PK_UNet['self_cond'] = False\n", "# write_PK_UNet['combine_upsample_fmaps'] = True # combine feature maps from all upsample blocks, used in unet squared successfully\n", "# write_PK_UNet['pixel_shuffle_upsample'] = False # may address checkboard artifacts\n", "# # write_PK_UNet['beginning_and_final_conv_present']=None # use default\n", "# #\n", "# ModelKeys['UNet']=UtilityPack.prepare_UNet_keys(write_PK_UNet)\n", "# # ++++++++++++++++++++++++++++++++++++++\n", "# # beyond UNet, for Whole model, all keys\n", "# # \"None\" means defult value on function definition\n", "# write_PK_Imagen=dict()\n", "\n", "# # used ones\n", "# write_PK_Imagen['timesteps']=ModelKeys['diff_timesteps']\n", "# write_PK_Imagen['dim']=ModelKeys['dim']\n", "# write_PK_Imagen['pred_dim']=ModelKeys['pred_dim']\n", "# write_PK_Imagen['loss_type']=ModelKeys['loss_type'] # 0 # MSE\n", "# write_PK_Imagen['elucidated']=ModelKeys['elucidated'] # True\n", "# write_PK_Imagen['padding_idx']=ModelKeys['padding_idx'] # 0 # need to check\n", "# write_PK_Imagen['cond_dim']=ModelKeys['cond_dim'] # use default\n", "# # can use default, 512; or like the below (check the code)\n", "# write_PK_Imagen['text_embed_dim']=ModelKeys['text_embed_dim']-ModelKeys['embed_dim_position']\n", "# # write_PK_Imagen['input_tokens']=None\n", "# # write_PK_Imagen['sequence_embed']=None\n", "# write_PK_Imagen['embed_dim_position']=ModelKeys['embed_dim_position']\n", "# write_PK_Imagen['max_text_len']=ModelKeys['max_text_len']\n", "# write_PK_Imagen['cond_images_channels']=ModelKeys['cond_images_channels']\n", "# write_PK_Imagen['max_length']=ModelKeys['max_length']\n", "# write_PK_Imagen['device']=ModelKeys['device']\n", "\n", "# # extend it to a full key\n", "# ModelKeys['Imagen']=UtilityPack.prepare_ModelB_keys(write_PK_Imagen)\n", "\n", " elif CKeys['Problem_ID']==4:\n", " pass\n", "\n", "# ModelKeys={}\n", "# # storage\n", "# ModelKeys['model_dir']=PKeys['prefix']+'1_model_SS/'\n", "# # create the folder\n", "# print(\"Creating the model dir...\")\n", "# UtilityPack.create_path(ModelKeys['model_dir'])\n", "# # secondary folders\n", "# ModelKeys['model_dir_sample']=ModelKeys['model_dir']+'0_mid_sample/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_sample'])\n", "# ModelKeys['model_dir_model']=ModelKeys['model_dir']+'1_store_model/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_model'])\n", "\n", "# # for UNet\n", "# ModelKeys['dim']=768 # dim for UNet\n", "# # for debug\n", "# ModelKeys['dim']=256 # dim for UNet: GPU stat: 256:16093MiB;\n", "# #\n", "# ModelKeys['text_embed_dim']=512-128\n", "# ModelKeys['embed_dim_position']=128\n", "# # ! this is for UNet, not for Imagen\n", "# ModelKeys['text_embed_dim']=ModelKeys['text_embed_dim']+ModelKeys['embed_dim_position']\n", "# ModelKeys['cond_dim']=512\n", "# # ModelKeys['embed_dim_position']=128\n", "# # ModelKeys['text_embed_dim']=ModelKeys['cond_dim']-ModelKeys['embed_dim_position']\n", "# # !!! Need to check this one = 1 or not\n", "# ModelKeys['cond_images_channels']=0 # 1\n", "# # !!! Need to check this one: should it be 8 or 64?\n", "# ModelKeys['max_text_len']= DataKeys['max_text_len'] # DataKeys['max_AA_seq_len'] # 8 # this is about text condi\n", "\n", "# # for Imagen\n", "# ModelKeys['pred_dim']=1 # for sequence, =1\n", "# ModelKeys['diff_timesteps']=(96) # (96,)\n", "# ModelKeys['loss_type']=0 # MSE\n", "# ModelKeys['elucidated']=True #\n", "# ModelKeys['padding_idx']=0\n", "# ModelKeys['max_length']=DataKeys['max_AA_seq_len']\n", "# ModelKeys['device']=device\n", "# #\n", "# # baskets for Model building\n", "# ModelKeys['UNet']={}\n", "# ModelKeys['Imagen']={}\n", "# # ++++++++++++++++++++++++++++++++++++++\n", "# # prepare the Unet Key\n", "# write_PK_UNet=dict()\n", "# # used ones\n", "# write_PK_UNet['dim']=ModelKeys['dim']\n", "# write_PK_UNet['text_embed_dim']=ModelKeys['text_embed_dim']\n", "# write_PK_UNet['num_resnet_blocks']=1\n", "# write_PK_UNet['cond_dim']=ModelKeys['cond_dim'] #this is where text embeddings are projected to...\n", "# # write_PK_UNet['num_image_tokens']=None # using the default value\n", "# # write_PK_UNet['num_time_tokens']=None # using the default\n", "# # write_PK_UNet['learned_sinu_pos_emb_dim']=None\n", "# # write_PK_UNet['out_dim']=None\n", "# write_PK_UNet['dim_mults']=(1, 2, 4, 8)\n", "\n", "# write_PK_UNet['cond_images_channels']=ModelKeys['cond_images_channels']\n", "# write_PK_UNet['channels']=ModelKeys['pred_dim']\n", "# write_PK_UNet['channels_out']=ModelKeys['pred_dim']\n", "\n", "# write_PK_UNet['attn_dim_head']=64\n", "# write_PK_UNet['attn_heads']=8\n", "# write_PK_UNet['ff_mult']=2.\n", "# write_PK_UNet['lowres_cond']=False # for cascading diffusion - https://cascaded-diffusion.github.io/\n", "\n", "\n", "# write_PK_UNet['layer_attns']=(False, True, True, False)\n", "# write_PK_UNet['layer_attns_depth']=1\n", "# write_PK_UNet['layer_attns_add_text_cond']=True # whether to condition the self-attention blocks with the text embeddings, as described in Appendix D.3.1\n", "# write_PK_UNet['attend_at_middle']=True # whether to have a layer of attention at the bottleneck (can turn off for higher resolution in cascading DDPM, before bringing in efficient attention)\n", "# write_PK_UNet['layer_cross_attns']=(False, True, True, False)\n", "# write_PK_UNet['use_linear_attn']=False\n", "# write_PK_UNet['use_linear_cross_attn']=False\n", "\n", "# write_PK_UNet['cond_on_text'] = True\n", "# # !!! Need to check this: 63, Imagen used a different one\n", "# write_PK_UNet['max_text_len'] = DataKeys['max_text_len'] # DataKeys['max_AA_seq_len'] # ModelKeys['max_text_len']\n", "# # write_PK_UNet['init_dim'] = None\n", "# write_PK_UNet['resnet_groups'] = 8\n", "# write_PK_UNet['init_conv_kernel_size'] =7 # kernel size of initial conv, if not using cross embed\n", "# write_PK_UNet['init_cross_embed'] = False #TODO - fix ouput size calcs for conv1d\n", "# write_PK_UNet['init_cross_embed_kernel_sizes'] = (3, 7, 15)\n", "# write_PK_UNet['cross_embed_downsample'] = False\n", "# write_PK_UNet['cross_embed_downsample_kernel_sizes'] = (2, 4)\n", "\n", "# write_PK_UNet['attn_pool_text'] = True\n", "# write_PK_UNet['attn_pool_num_latents'] = 32 #32, #perceiver model latents\n", "# write_PK_UNet['dropout'] = 0.\n", "# write_PK_UNet['memory_efficient'] = False\n", "# write_PK_UNet['init_conv_to_final_conv_residual'] = False\n", "\n", "# write_PK_UNet['use_global_context_attn'] = True\n", "# write_PK_UNet['scale_skip_connection'] = True\n", "# write_PK_UNet['final_resnet_block'] = True\n", "# write_PK_UNet['final_conv_kernel_size'] = 3\n", "\n", "# write_PK_UNet['cosine_sim_attn'] = True\n", "# write_PK_UNet['self_cond'] = False\n", "# write_PK_UNet['combine_upsample_fmaps'] = True # combine feature maps from all upsample blocks, used in unet squared successfully\n", "# write_PK_UNet['pixel_shuffle_upsample'] = False # may address checkboard artifacts\n", "# # write_PK_UNet['beginning_and_final_conv_present']=None # use default\n", "# #\n", "# ModelKeys['UNet']=UtilityPack.prepare_UNet_keys(write_PK_UNet)\n", "# # ++++++++++++++++++++++++++++++++++++++\n", "# # beyond UNet, for Whole model, all keys\n", "# # \"None\" means defult value on function definition\n", "# write_PK_Imagen=dict()\n", "\n", "# # used ones\n", "# write_PK_Imagen['timesteps']=ModelKeys['diff_timesteps']\n", "# write_PK_Imagen['dim']=ModelKeys['dim']\n", "# write_PK_Imagen['pred_dim']=ModelKeys['pred_dim']\n", "# write_PK_Imagen['loss_type']=ModelKeys['loss_type'] # 0 # MSE\n", "# write_PK_Imagen['elucidated']=ModelKeys['elucidated'] # True\n", "# write_PK_Imagen['padding_idx']=ModelKeys['padding_idx'] # 0 # need to check\n", "# write_PK_Imagen['cond_dim']=ModelKeys['cond_dim'] # use default\n", "# # can use default, 512; or like the below (check the code)\n", "# write_PK_Imagen['text_embed_dim']=ModelKeys['text_embed_dim']-ModelKeys['embed_dim_position']\n", "# # write_PK_Imagen['input_tokens']=None\n", "# # write_PK_Imagen['sequence_embed']=None\n", "# write_PK_Imagen['embed_dim_position']=ModelKeys['embed_dim_position']\n", "# write_PK_Imagen['max_text_len']=ModelKeys['max_text_len']\n", "# write_PK_Imagen['cond_images_channels']=ModelKeys['cond_images_channels']\n", "# write_PK_Imagen['max_length']=ModelKeys['max_length']\n", "# write_PK_Imagen['device']=ModelKeys['device']\n", "\n", "# # extend it to a full key\n", "# ModelKeys['Imagen']=UtilityPack.prepare_ModelB_keys(write_PK_Imagen)\n", "\n", " elif CKeys['Problem_ID']==5:\n", " pass\n", "\n", "# # this is 1st run....\n", "# # +++++++++++++++++++++++++++++++++++++\n", "# # SecStr as input seq\n", "# ModelKeys={}\n", "# # storage\n", "# ModelKeys['model_dir']=PKeys['prefix']+'1_model_SS/'\n", "# # create the folder\n", "# print(\"Creating the model dir...\")\n", "# UtilityPack.create_path(ModelKeys['model_dir'])\n", "# # secondary folders\n", "# ModelKeys['model_dir_sample']=ModelKeys['model_dir']+'0_mid_sample/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_sample'])\n", "# ModelKeys['model_dir_model']=ModelKeys['model_dir']+'1_store_model/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_model'])\n", "\n", "# # for UNet\n", "# ModelKeys['dim']=256 # dim for UNet\n", "# ModelKeys['text_embed_dim'] = 512\n", "# ModelKeys['embed_dim_position']=32\n", "# ModelKeys['text_embed_dim']=ModelKeys['text_embed_dim']+ModelKeys['embed_dim_position']\n", "# ModelKeys['cond_dim'] = 512\n", "# ModelKeys['cond_images_channels']=DataKeys['image_channels'] # 1280 # for embedding dim # 1\n", "# ModelKeys['max_text_len']=DataKeys['max_AA_seq_len'] # this is about text condi\n", "# # + for embedding\n", "# ModelKeys['image_channels']=DataKeys['image_channels'] # 1280\n", "# # for Imagen\n", "# ModelKeys['pred_dim']= ModelKeys['image_channels'] # 1 # for sequence, =1\n", "# ModelKeys['diff_timesteps']=(96,)\n", "# ModelKeys['loss_type']=0 # MSE\n", "# ModelKeys['elucidated']=True #\n", "# ModelKeys['padding_idx']=0\n", "# ModelKeys['max_length']=DataKeys['max_AA_seq_len'] # max seq len\n", "# ModelKeys['device']=device\n", "# #\n", "# # buckets for Model building\n", "# ModelKeys['UNet']={}\n", "# ModelKeys['Imagen']={}\n", "# # ++++++++++++++++++++++++++++++++++++++++++++++++++\n", "# write_PK_UNet=dict()\n", "# write_PK_UNet['dim']=ModelKeys['dim'] # 256\n", "# write_PK_UNet['text_embed_dim']=ModelKeys['text_embed_dim'] # 512+32\n", "# write_PK_UNet['num_resnet_blocks']=1\n", "# write_PK_UNet['cond_dim']=ModelKeys['cond_dim'] #this is where text embeddings are projected to...\n", "# # write_PK_UNet['num_image_tokens']=None # using the default value\n", "# # write_PK_UNet['num_time_tokens']=None # using the default\n", "# # write_PK_UNet['learned_sinu_pos_emb_dim']=None\n", "# # write_PK_UNet['out_dim']=None\n", "# write_PK_UNet['dim_mults']=(1, 2, 4, 8)\n", "\n", "# write_PK_UNet['cond_images_channels']=ModelKeys['cond_images_channels']\n", "# write_PK_UNet['channels']=ModelKeys['image_channels'] # 1\n", "# write_PK_UNet['channels_out']=ModelKeys['image_channels'] # 1\n", "\n", "# write_PK_UNet['attn_dim_head']=64\n", "# write_PK_UNet['attn_heads']=8\n", "# write_PK_UNet['ff_mult']=2.\n", "# write_PK_UNet['lowres_cond']=False # for cascading diffusion - https://cascaded-diffusion.github.io/\n", "\n", "# write_PK_UNet['layer_attns']=(False, True, True, False)\n", "# write_PK_UNet['layer_attns_depth']=1\n", "# write_PK_UNet['layer_attns_add_text_cond']=True # whether to condition the self-attention blocks with the text embeddings, as described in Appendix D.3.1\n", "# write_PK_UNet['attend_at_middle']=True # whether to have a layer of attention at the bottleneck (can turn off for higher resolution in cascading DDPM, before bringing in efficient attention)\n", "# write_PK_UNet['layer_cross_attns']=(False, True, True, False)\n", "# write_PK_UNet['use_linear_attn']=False\n", "# write_PK_UNet['use_linear_cross_attn']=False\n", "\n", "# write_PK_UNet['cond_on_text']=True\n", "# write_PK_UNet['max_text_len']=ModelKeys['max_text_len'] # need to check this one\n", "# # ModelKeys['UNet']['init_dim']=None\n", "# write_PK_UNet['resnet_groups']=8\n", "# write_PK_UNet['init_conv_kernel_size']=7 # kernel size of initial conv, if not using cross embed\n", "# write_PK_UNet['init_cross_embed']=False #TODO - fix ouput size calcs for conv1d\n", "# write_PK_UNet['init_cross_embed_kernel_sizes']=(3, 7, 15)\n", "# write_PK_UNet['cross_embed_downsample']=False\n", "# write_PK_UNet['cross_embed_downsample_kernel_sizes']=(2, 4)\n", "\n", "# write_PK_UNet['attn_pool_text']=True\n", "# write_PK_UNet['attn_pool_num_latents']=32 #perceiver model latents\n", "# write_PK_UNet['dropout']=0.\n", "# write_PK_UNet['memory_efficient']=False\n", "# write_PK_UNet['init_conv_to_final_conv_residual']=False\n", "\n", "# write_PK_UNet['use_global_context_attn']=True\n", "# write_PK_UNet['scale_skip_connection']=True\n", "# write_PK_UNet['final_resnet_block']=True\n", "# write_PK_UNet['final_conv_kernel_size']=3\n", "\n", "# write_PK_UNet['cosine_sim_attn']=True\n", "# write_PK_UNet['self_cond']=False\n", "# write_PK_UNet['combine_upsample_fmaps']=True # combine feature maps from all upsample blocks, used in unet squared successfully\n", "# write_PK_UNet['pixel_shuffle_upsample']=False # may address checkboard artifacts\n", "# # write_PK_UNet['beginning_and_final_conv_present']=None # use default\n", "# ModelKeys['UNet']=UtilityPack.prepare_UNet_keys(write_PK_UNet)\n", "# # +++++++++++++++++++++++++++++++++++++++++++++++++++++\n", "# # beyond UNet, for Whole model, all keys\n", "# # \"None\" means defult value on function definition\n", "# write_PK_Imagen=dict()\n", "\n", "# write_PK_Imagen['timesteps']=ModelKeys['diff_timesteps']\n", "# write_PK_Imagen['dim']=ModelKeys['dim']\n", "# write_PK_Imagen['pred_dim']=ModelKeys['pred_dim']\n", "# write_PK_Imagen['loss_type']=ModelKeys['loss_type'] # 0 # MSE\n", "# write_PK_Imagen['elucidated']=ModelKeys['elucidated'] # True\n", "# write_PK_Imagen['padding_idx']=ModelKeys['padding_idx'] # 0 # need to check\n", "# # write_PK_Imagen['cond_dim']=None # use default\n", "# # write_PK_Imagen['text_embed_dim']=None\n", "# # write_PK_Imagen['input_tokens']=None\n", "# # write_PK_Imagen['sequence_embed']=None\n", "# # write_PK_Imagen['embed_dim_position']=None\n", "# write_PK_Imagen['max_text_len']=ModelKeys['max_text_len']\n", "# write_PK_Imagen['cond_images_channels']=ModelKeys['pred_dim']\n", "# write_PK_Imagen['max_length']=ModelKeys['max_length']\n", "# write_PK_Imagen['device']=ModelKeys['device']\n", "\n", "# # extend it to a full key\n", "# ModelKeys['Imagen']=UtilityPack.prepare_ModelB_keys(write_PK_Imagen)\n", "\n", " elif CKeys['Problem_ID']==6:\n", " # =====================================================\n", " # ForcePath --> AA sequence\n", " # =====================================================\n", " #\n", " # this is 1st run....\n", " # +++++++++++++++++++++++++++++++++++++\n", " # ForcPath as input seq\n", " ModelKeys={}\n", " # storage\n", " ModelKeys['model_dir']=PKeys['prefix']+'1_model_SS/'\n", " # create the folder\n", " print(\"Creating the model dir...\")\n", " UtilityPack.create_path(ModelKeys['model_dir'])\n", " # secondary folders\n", " ModelKeys['model_dir_sample']=ModelKeys['model_dir']+'0_mid_sample/'\n", " UtilityPack.create_path(ModelKeys['model_dir_sample'])\n", " ModelKeys['model_dir_model']=ModelKeys['model_dir']+'1_store_model/'\n", " UtilityPack.create_path(ModelKeys['model_dir_model'])\n", "\n", " # for UNet\n", " ModelKeys['dim']=256 # dim for UNet\n", " ModelKeys['text_embed_dim'] = 512\n", " ModelKeys['embed_dim_position']=32\n", " ModelKeys['text_embed_dim']=ModelKeys['text_embed_dim']+ModelKeys['embed_dim_position']\n", " ModelKeys['cond_dim'] = 512\n", " ModelKeys['cond_images_channels']=DataKeys['image_channels'] # 1\n", " ModelKeys['max_text_len']=DataKeys['max_AA_seq_len'] # this is about text condi\n", " # for Imagen\n", " ModelKeys['pred_dim']=DataKeys['image_channels'] # 1 # for sequence, =1\n", " ModelKeys['diff_timesteps']=(96,)\n", " ModelKeys['loss_type']=0 # MSE\n", " ModelKeys['elucidated']=True #\n", " ModelKeys['padding_idx']=0\n", " ModelKeys['max_length']=DataKeys['max_AA_seq_len'] # max seq len\n", " ModelKeys['device']=device\n", " #\n", " # buckets for Model building\n", " ModelKeys['UNet']={}\n", " ModelKeys['Imagen']={}\n", " # ++++++++++++++++++++++++++++++++++++++++++++++++++\n", " write_PK_UNet=dict()\n", " write_PK_UNet['dim']=ModelKeys['dim'] # 256\n", " write_PK_UNet['text_embed_dim']=ModelKeys['text_embed_dim'] # 512+32\n", " write_PK_UNet['num_resnet_blocks']=1\n", " write_PK_UNet['cond_dim']=ModelKeys['cond_dim'] #this is where text embeddings are projected to...\n", " # write_PK_UNet['num_image_tokens']=None # using the default value\n", " # write_PK_UNet['num_time_tokens']=None # using the default\n", " # write_PK_UNet['learned_sinu_pos_emb_dim']=None\n", " # write_PK_UNet['out_dim']=None\n", " write_PK_UNet['dim_mults']=(1, 2, 4, 8)\n", "\n", " write_PK_UNet['cond_images_channels']=ModelKeys['cond_images_channels']\n", " write_PK_UNet['channels']=DataKeys['image_channels'] # 1\n", " write_PK_UNet['channels_out']=DataKeys['image_channels'] # 1\n", "\n", " write_PK_UNet['attn_dim_head']=64\n", " write_PK_UNet['attn_heads']=8\n", " write_PK_UNet['ff_mult']=2.\n", " write_PK_UNet['lowres_cond']=False # for cascading diffusion - https://cascaded-diffusion.github.io/\n", "\n", " write_PK_UNet['layer_attns']=(False, True, True, False)\n", " write_PK_UNet['layer_attns_depth']=1\n", " write_PK_UNet['layer_attns_add_text_cond']=True # whether to condition the self-attention blocks with the text embeddings, as described in Appendix D.3.1\n", " write_PK_UNet['attend_at_middle']=True # whether to have a layer of attention at the bottleneck (can turn off for higher resolution in cascading DDPM, before bringing in efficient attention)\n", " write_PK_UNet['layer_cross_attns']=(False, True, True, False)\n", " write_PK_UNet['use_linear_attn']=False\n", " write_PK_UNet['use_linear_cross_attn']=False\n", "\n", " write_PK_UNet['cond_on_text']=True\n", " write_PK_UNet['max_text_len']=ModelKeys['max_text_len'] # need to check this one\n", " # ModelKeys['UNet']['init_dim']=None\n", " write_PK_UNet['resnet_groups']=8\n", " write_PK_UNet['init_conv_kernel_size']=7 # kernel size of initial conv, if not using cross embed\n", " write_PK_UNet['init_cross_embed']=False #TODO - fix ouput size calcs for conv1d\n", " write_PK_UNet['init_cross_embed_kernel_sizes']=(3, 7, 15)\n", " write_PK_UNet['cross_embed_downsample']=False\n", " write_PK_UNet['cross_embed_downsample_kernel_sizes']=(2, 4)\n", "\n", " write_PK_UNet['attn_pool_text']=True\n", " write_PK_UNet['attn_pool_num_latents']=32 #perceiver model latents\n", " write_PK_UNet['dropout']=0.\n", " write_PK_UNet['memory_efficient']=False\n", " write_PK_UNet['init_conv_to_final_conv_residual']=False\n", "\n", " write_PK_UNet['use_global_context_attn']=True\n", " write_PK_UNet['scale_skip_connection']=True\n", " write_PK_UNet['final_resnet_block']=True\n", " write_PK_UNet['final_conv_kernel_size']=3\n", "\n", " write_PK_UNet['cosine_sim_attn']=True\n", " write_PK_UNet['self_cond']=False\n", " write_PK_UNet['combine_upsample_fmaps']=True # combine feature maps from all upsample blocks, used in unet squared successfully\n", " write_PK_UNet['pixel_shuffle_upsample']=False # may address checkboard artifacts\n", " # write_PK_UNet['beginning_and_final_conv_present']=None # use default\n", " ModelKeys['UNet']=UtilityPack.prepare_UNet_keys(write_PK_UNet)\n", " # +++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " # beyond UNet, for Whole model, all keys\n", " # \"None\" means defult value on function definition\n", " write_PK_Imagen=dict()\n", "\n", " write_PK_Imagen['timesteps']=ModelKeys['diff_timesteps']\n", " write_PK_Imagen['dim']=ModelKeys['dim']\n", " write_PK_Imagen['pred_dim']=ModelKeys['pred_dim']\n", " write_PK_Imagen['loss_type']=ModelKeys['loss_type'] # 0 # MSE\n", " write_PK_Imagen['elucidated']=ModelKeys['elucidated'] # True\n", " write_PK_Imagen['padding_idx']=ModelKeys['padding_idx'] # 0 # need to check\n", " # write_PK_Imagen['cond_dim']=None # use default\n", " # write_PK_Imagen['text_embed_dim']=None\n", " # write_PK_Imagen['input_tokens']=None\n", " # write_PK_Imagen['sequence_embed']=None\n", " # write_PK_Imagen['embed_dim_position']=None\n", " write_PK_Imagen['max_text_len']=ModelKeys['max_text_len']\n", " write_PK_Imagen['cond_images_channels']=ModelKeys['cond_images_channels']\n", " write_PK_Imagen['max_length']=ModelKeys['max_length']\n", " write_PK_Imagen['device']=ModelKeys['device']\n", "\n", " # extend it to a full key\n", " ModelKeys['Imagen']=UtilityPack.prepare_ModelB_keys(write_PK_Imagen)\n", " # to be defined\n", "\n", " elif CKeys['Problem_ID']==7:\n", " pass\n", "# # ================================================================\n", "# # SecStr text summary --> AA seq pLM embedding\n", "# # ================================================================\n", "# ModelKeys={}\n", "# # storage\n", "# ModelKeys['model_dir']=PKeys['prefix']+'1_model_SS/'\n", "# # create the folder\n", "# print(\"Creating the model dir...\")\n", "# UtilityPack.create_path(ModelKeys['model_dir'])\n", "# # secondary folders\n", "# ModelKeys['model_dir_sample']=ModelKeys['model_dir']+'0_mid_sample/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_sample'])\n", "# ModelKeys['model_dir_model']=ModelKeys['model_dir']+'1_store_model/'\n", "# UtilityPack.create_path(ModelKeys['model_dir_model'])\n", "\n", "# # for UNet\n", "# ModelKeys['dim']=768 # dim for UNet\n", "# # for debug\n", "# ModelKeys['dim']=256 # dim for UNet: GPU stat: 256:16093MiB;\n", "# #\n", "# ModelKeys['text_embed_dim']=512-128\n", "# ModelKeys['embed_dim_position']=128\n", "# # ! this is for UNet, not for Imagen\n", "# ModelKeys['text_embed_dim']=ModelKeys['text_embed_dim']+ModelKeys['embed_dim_position']\n", "# ModelKeys['cond_dim']=512\n", "# # ModelKeys['embed_dim_position']=128\n", "# # ModelKeys['text_embed_dim']=ModelKeys['cond_dim']-ModelKeys['embed_dim_position']\n", "# # !!! Need to check this one = 1 or not\n", "# # $: UNet use this key to decide whether cond_img is provided or not (0)\n", "# ModelKeys['cond_images_channels']= 0 # indicate NO cond_img # 0 # DataKeys['image_channels']\n", "# # !!! Need to check this one: should it be 8 or 64?\n", "# ModelKeys['max_text_len']= DataKeys['max_text_len'] # DataKeys['max_AA_seq_len'] # 8 # this is about text condi\n", "\n", "# # for Imagen\n", "# ModelKeys['pred_dim']=DataKeys['image_channels'] # 1 # for sequence, =1\n", "# ModelKeys['diff_timesteps']=(96) # (96,)\n", "# ModelKeys['loss_type']=0 # MSE\n", "# ModelKeys['elucidated']=True #\n", "# ModelKeys['padding_idx']=0\n", "# ModelKeys['max_length']=DataKeys['max_AA_seq_len']\n", "# ModelKeys['device']=device\n", "# #\n", "# # baskets for Model building\n", "# ModelKeys['UNet']={}\n", "# ModelKeys['Imagen']={}\n", "# # ++++++++++++++++++++++++++++++++++++++\n", "# # prepare the Unet Key\n", "# write_PK_UNet=dict()\n", "# # used ones\n", "# write_PK_UNet['dim']=ModelKeys['dim']\n", "# write_PK_UNet['text_embed_dim']=ModelKeys['text_embed_dim']\n", "# write_PK_UNet['num_resnet_blocks']=1\n", "# write_PK_UNet['cond_dim']=ModelKeys['cond_dim'] #this is where text embeddings are projected to...\n", "# # write_PK_UNet['num_image_tokens']=None # using the default value\n", "# # write_PK_UNet['num_time_tokens']=None # using the default\n", "# # write_PK_UNet['learned_sinu_pos_emb_dim']=None\n", "# # write_PK_UNet['out_dim']=None\n", "# write_PK_UNet['dim_mults']=(1, 2, 4, 8)\n", "\n", "# write_PK_UNet['cond_images_channels']=ModelKeys['cond_images_channels']\n", "# write_PK_UNet['channels']=DataKeys['image_channels'] # ModelKeys['pred_dim']\n", "# write_PK_UNet['channels_out']=DataKeys['image_channels'] # ModelKeys['pred_dim']\n", "\n", "# write_PK_UNet['attn_dim_head']=64\n", "# write_PK_UNet['attn_heads']=8\n", "# write_PK_UNet['ff_mult']=2.\n", "# write_PK_UNet['lowres_cond']=False # for cascading diffusion - https://cascaded-diffusion.github.io/\n", "\n", "\n", "# write_PK_UNet['layer_attns']=(False, True, True, False)\n", "# write_PK_UNet['layer_attns_depth']=1\n", "# write_PK_UNet['layer_attns_add_text_cond']=True # whether to condition the self-attention blocks with the text embeddings, as described in Appendix D.3.1\n", "# write_PK_UNet['attend_at_middle']=True # whether to have a layer of attention at the bottleneck (can turn off for higher resolution in cascading DDPM, before bringing in efficient attention)\n", "# write_PK_UNet['layer_cross_attns']=(False, True, True, False)\n", "# write_PK_UNet['use_linear_attn']=False\n", "# write_PK_UNet['use_linear_cross_attn']=False\n", "\n", "# write_PK_UNet['cond_on_text'] = True\n", "# # !!! Need to check this: 63, Imagen used a different one\n", "# # !!! here try the new one which seems to be correct\n", "# write_PK_UNet['max_text_len'] = ModelKeys['max_text_len'] # DataKeys['max_AA_seq_len'] # ModelKeys['max_text_len']\n", "# # write_PK_UNet['init_dim'] = None\n", "# write_PK_UNet['resnet_groups'] = 8\n", "# write_PK_UNet['init_conv_kernel_size'] =7 # kernel size of initial conv, if not using cross embed\n", "# write_PK_UNet['init_cross_embed'] = False #TODO - fix ouput size calcs for conv1d\n", "# write_PK_UNet['init_cross_embed_kernel_sizes'] = (3, 7, 15)\n", "# write_PK_UNet['cross_embed_downsample'] = False\n", "# write_PK_UNet['cross_embed_downsample_kernel_sizes'] = (2, 4)\n", "\n", "# write_PK_UNet['attn_pool_text'] = True\n", "# write_PK_UNet['attn_pool_num_latents'] = 32 #32, #perceiver model latents\n", "# write_PK_UNet['dropout'] = 0.\n", "# write_PK_UNet['memory_efficient'] = False\n", "# write_PK_UNet['init_conv_to_final_conv_residual'] = False\n", "\n", "# write_PK_UNet['use_global_context_attn'] = True\n", "# write_PK_UNet['scale_skip_connection'] = True\n", "# write_PK_UNet['final_resnet_block'] = True\n", "# write_PK_UNet['final_conv_kernel_size'] = 3\n", "\n", "# write_PK_UNet['cosine_sim_attn'] = True\n", "# write_PK_UNet['self_cond'] = False\n", "# write_PK_UNet['combine_upsample_fmaps'] = True # combine feature maps from all upsample blocks, used in unet squared successfully\n", "# write_PK_UNet['pixel_shuffle_upsample'] = False # may address checkboard artifacts\n", "# # write_PK_UNet['beginning_and_final_conv_present']=None # use default\n", "# #\n", "# ModelKeys['UNet']=UtilityPack.prepare_UNet_keys(write_PK_UNet)\n", "# # ++++++++++++++++++++++++++++++++++++++\n", "# # beyond UNet, for Whole model, all keys\n", "# # \"None\" means defult value on function definition\n", "# write_PK_Imagen=dict()\n", "\n", "# # used ones\n", "# write_PK_Imagen['timesteps']=ModelKeys['diff_timesteps']\n", "# write_PK_Imagen['dim']=ModelKeys['dim']\n", "# write_PK_Imagen['pred_dim']=ModelKeys['pred_dim']\n", "# write_PK_Imagen['loss_type']=ModelKeys['loss_type'] # 0 # MSE\n", "# write_PK_Imagen['elucidated']=ModelKeys['elucidated'] # True\n", "# write_PK_Imagen['padding_idx']=ModelKeys['padding_idx'] # 0 # need to check\n", "# write_PK_Imagen['cond_dim']=ModelKeys['cond_dim'] # use default\n", "# # can use default, 512; or like the below (check the code)\n", "# write_PK_Imagen['text_embed_dim']=ModelKeys['text_embed_dim']-ModelKeys['embed_dim_position']\n", "# # write_PK_Imagen['input_tokens']=None\n", "# # write_PK_Imagen['sequence_embed']=None\n", "# write_PK_Imagen['embed_dim_position']=ModelKeys['embed_dim_position']\n", "# write_PK_Imagen['max_text_len']=ModelKeys['max_text_len']\n", "# write_PK_Imagen['cond_images_channels']=ModelKeys['cond_images_channels']\n", "# write_PK_Imagen['max_length']=ModelKeys['max_length']\n", "# write_PK_Imagen['device']=ModelKeys['device']\n", "\n", "# # extend it to a full key\n", "# ModelKeys['Imagen']=UtilityPack.prepare_ModelB_keys(write_PK_Imagen)\n", "\n", " elif CKeys['Problem_ID']==8:\n", " # ================================================================\n", " # FmaxEne text summary --> AA seq pLM embedding\n", " # ================================================================\n", " ModelKeys={}\n", " # storage\n", " ModelKeys['model_dir']=PKeys['prefix']+'1_model_SS/'\n", " # create the folder\n", " print(\"Creating the model dir...\")\n", " UtilityPack.create_path(ModelKeys['model_dir'])\n", " # secondary folders\n", " ModelKeys['model_dir_sample']=ModelKeys['model_dir']+'0_mid_sample/'\n", " UtilityPack.create_path(ModelKeys['model_dir_sample'])\n", " ModelKeys['model_dir_model']=ModelKeys['model_dir']+'1_store_model/'\n", " UtilityPack.create_path(ModelKeys['model_dir_model'])\n", "\n", " # for UNet\n", " ModelKeys['dim']=768 # dim for UNet\n", " # for debug\n", " ModelKeys['dim']=256 # dim for UNet: GPU stat: 256:16093MiB;\n", " #\n", " ModelKeys['text_embed_dim']=512-128\n", " ModelKeys['embed_dim_position']=128\n", " # ! this is for UNet, not for Imagen\n", " ModelKeys['text_embed_dim']=ModelKeys['text_embed_dim']+ModelKeys['embed_dim_position']\n", " ModelKeys['cond_dim']=512\n", " # ModelKeys['embed_dim_position']=128\n", " # ModelKeys['text_embed_dim']=ModelKeys['cond_dim']-ModelKeys['embed_dim_position']\n", " # !!! Need to check this one = 1 or not\n", " # $: UNet use this key to decide whether cond_img is provided or not (0)\n", " ModelKeys['cond_images_channels']=0 # indicate no cond_img is used # 1\n", " # !!! Need to check this one: should it be 8 or 64?\n", " ModelKeys['max_text_len']= DataKeys['max_text_len'] # DataKeys['max_AA_seq_len'] # 8 # this is about text condi\n", "\n", " # for Imagen\n", " ModelKeys['pred_dim']=DataKeys['image_channels'] # 1 # for sequence, =1\n", " ModelKeys['diff_timesteps']=(96) # (96,)\n", " ModelKeys['loss_type']=0 # MSE\n", " ModelKeys['elucidated']=True #\n", " ModelKeys['padding_idx']=0\n", " ModelKeys['max_length']=DataKeys['max_AA_seq_len']\n", " ModelKeys['device']=device\n", " #\n", " # baskets for Model building\n", " ModelKeys['UNet']={}\n", " ModelKeys['Imagen']={}\n", " # ++++++++++++++++++++++++++++++++++++++\n", " # prepare the Unet Key\n", " write_PK_UNet=dict()\n", " # used ones\n", " write_PK_UNet['dim']=ModelKeys['dim']\n", " write_PK_UNet['text_embed_dim']=ModelKeys['text_embed_dim']\n", " write_PK_UNet['num_resnet_blocks']=1\n", " write_PK_UNet['cond_dim']=ModelKeys['cond_dim'] #this is where text embeddings are projected to...\n", " # write_PK_UNet['num_image_tokens']=None # using the default value\n", " # write_PK_UNet['num_time_tokens']=None # using the default\n", " # write_PK_UNet['learned_sinu_pos_emb_dim']=None\n", " # write_PK_UNet['out_dim']=None\n", " write_PK_UNet['dim_mults']=(1, 2, 4, 8)\n", "\n", " write_PK_UNet['cond_images_channels']=ModelKeys['cond_images_channels']\n", " write_PK_UNet['channels']=DataKeys['image_channels'] # ModelKeys['pred_dim']\n", " write_PK_UNet['channels_out']=DataKeys['image_channels'] # ModelKeys['pred_dim']\n", "\n", " write_PK_UNet['attn_dim_head']=64\n", " write_PK_UNet['attn_heads']=8\n", " write_PK_UNet['ff_mult']=2.\n", " write_PK_UNet['lowres_cond']=False # for cascading diffusion - https://cascaded-diffusion.github.io/\n", "\n", "\n", " write_PK_UNet['layer_attns']=(False, True, True, False)\n", " write_PK_UNet['layer_attns_depth']=1\n", " write_PK_UNet['layer_attns_add_text_cond']=True # whether to condition the self-attention blocks with the text embeddings, as described in Appendix D.3.1\n", " write_PK_UNet['attend_at_middle']=True # whether to have a layer of attention at the bottleneck (can turn off for higher resolution in cascading DDPM, before bringing in efficient attention)\n", " write_PK_UNet['layer_cross_attns']=(False, True, True, False)\n", " write_PK_UNet['use_linear_attn']=False\n", " write_PK_UNet['use_linear_cross_attn']=False\n", "\n", " write_PK_UNet['cond_on_text'] = True\n", " # !!! Need to check this: 63, Imagen used a different one\n", " write_PK_UNet['max_text_len'] = DataKeys['max_text_len'] # DataKeys['max_AA_seq_len'] # ModelKeys['max_text_len']\n", " # write_PK_UNet['init_dim'] = None\n", " write_PK_UNet['resnet_groups'] = 8\n", " write_PK_UNet['init_conv_kernel_size'] =7 # kernel size of initial conv, if not using cross embed\n", " write_PK_UNet['init_cross_embed'] = False #TODO - fix ouput size calcs for conv1d\n", " write_PK_UNet['init_cross_embed_kernel_sizes'] = (3, 7, 15)\n", " write_PK_UNet['cross_embed_downsample'] = False\n", " write_PK_UNet['cross_embed_downsample_kernel_sizes'] = (2, 4)\n", "\n", " write_PK_UNet['attn_pool_text'] = True\n", " write_PK_UNet['attn_pool_num_latents'] = 32 #32, #perceiver model latents\n", " write_PK_UNet['dropout'] = 0.\n", " write_PK_UNet['memory_efficient'] = False\n", " write_PK_UNet['init_conv_to_final_conv_residual'] = False\n", "\n", " write_PK_UNet['use_global_context_attn'] = True\n", " write_PK_UNet['scale_skip_connection'] = True\n", " write_PK_UNet['final_resnet_block'] = True\n", " write_PK_UNet['final_conv_kernel_size'] = 3\n", "\n", " write_PK_UNet['cosine_sim_attn'] = True\n", " write_PK_UNet['self_cond'] = False\n", " write_PK_UNet['combine_upsample_fmaps'] = True # combine feature maps from all upsample blocks, used in unet squared successfully\n", " write_PK_UNet['pixel_shuffle_upsample'] = False # may address checkboard artifacts\n", " # write_PK_UNet['beginning_and_final_conv_present']=None # use default\n", " #\n", " ModelKeys['UNet']=UtilityPack.prepare_UNet_keys(write_PK_UNet)\n", " # ++++++++++++++++++++++++++++++++++++++\n", " # beyond UNet, for Whole model, all keys\n", " # \"None\" means defult value on function definition\n", " write_PK_Imagen=dict()\n", "\n", " # used ones\n", " write_PK_Imagen['timesteps']=ModelKeys['diff_timesteps']\n", " write_PK_Imagen['dim']=ModelKeys['dim']\n", " write_PK_Imagen['pred_dim']=ModelKeys['pred_dim']\n", " write_PK_Imagen['loss_type']=ModelKeys['loss_type'] # 0 # MSE\n", " write_PK_Imagen['elucidated']=ModelKeys['elucidated'] # True\n", " write_PK_Imagen['padding_idx']=ModelKeys['padding_idx'] # 0 # need to check\n", " write_PK_Imagen['cond_dim']=ModelKeys['cond_dim'] # use default\n", " # can use default, 512; or like the below (check the code)\n", " write_PK_Imagen['text_embed_dim']=ModelKeys['text_embed_dim']-ModelKeys['embed_dim_position']\n", " # write_PK_Imagen['input_tokens']=None\n", " # write_PK_Imagen['sequence_embed']=None\n", " write_PK_Imagen['embed_dim_position']=ModelKeys['embed_dim_position']\n", " write_PK_Imagen['max_text_len']=ModelKeys['max_text_len']\n", " write_PK_Imagen['cond_images_channels']=ModelKeys['cond_images_channels']\n", " write_PK_Imagen['max_length']=ModelKeys['max_length']\n", " write_PK_Imagen['device']=ModelKeys['device']\n", "\n", " # extend it to a full key\n", " ModelKeys['Imagen']=UtilityPack.prepare_ModelB_keys(write_PK_Imagen)\n", "\n", " elif CKeys['Problem_ID']==11:\n", " # =====================================================\n", " # ForcePath --> AA sequence\n", " # =====================================================\n", " #\n", " # this is 1st run....\n", " # +++++++++++++++++++++++++++++++++++++\n", " # ForcPath as input seq\n", " ModelKeys={}\n", " # storage\n", " ModelKeys['model_dir']=PKeys['prefix']+'1_model_SS/'\n", " # create the folder\n", " print(\"Creating the model dir...\")\n", " UtilityPack.create_path(ModelKeys['model_dir'])\n", " # secondary folders\n", " ModelKeys['model_dir_sample']=ModelKeys['model_dir']+'0_mid_sample/'\n", " UtilityPack.create_path(ModelKeys['model_dir_sample'])\n", " ModelKeys['model_dir_model']=ModelKeys['model_dir']+'1_store_model/'\n", " UtilityPack.create_path(ModelKeys['model_dir_model'])\n", "\n", " # for UNet\n", " ModelKeys['dim']=256 # dim for UNet\n", " ModelKeys['text_embed_dim'] = 512\n", " ModelKeys['embed_dim_position']=32\n", " ModelKeys['text_embed_dim']=ModelKeys['text_embed_dim']+ModelKeys['embed_dim_position']\n", " ModelKeys['cond_dim'] = 512\n", " ModelKeys['cond_images_channels']=DataKeys['image_channels'] # 1\n", " ModelKeys['max_text_len']=DataKeys['max_AA_seq_len'] # this is about text condi\n", " # for Imagen\n", " ModelKeys['pred_dim']=DataKeys['image_channels'] # 1 # for sequence, =1\n", " ModelKeys['diff_timesteps']=(96,)\n", " ModelKeys['loss_type']=0 # MSE\n", " ModelKeys['elucidated']=True #\n", " ModelKeys['padding_idx']=0\n", " ModelKeys['max_length']=DataKeys['max_AA_seq_len'] # max seq len\n", " ModelKeys['device']=device\n", " #\n", " # buckets for Model building\n", " ModelKeys['UNet']={}\n", " ModelKeys['Imagen']={}\n", " # ++++++++++++++++++++++++++++++++++++++++++++++++++\n", " write_PK_UNet=dict()\n", " write_PK_UNet['dim']=ModelKeys['dim'] # 256\n", " write_PK_UNet['text_embed_dim']=ModelKeys['text_embed_dim'] # 512+32\n", " write_PK_UNet['num_resnet_blocks']=1\n", " write_PK_UNet['cond_dim']=ModelKeys['cond_dim'] #this is where text embeddings are projected to...\n", " # write_PK_UNet['num_image_tokens']=None # using the default value\n", " # write_PK_UNet['num_time_tokens']=None # using the default\n", " # write_PK_UNet['learned_sinu_pos_emb_dim']=None\n", " # write_PK_UNet['out_dim']=None\n", " write_PK_UNet['dim_mults']=(1, 2, 4, 8)\n", "\n", " write_PK_UNet['cond_images_channels']=ModelKeys['cond_images_channels']\n", " write_PK_UNet['channels']=DataKeys['image_channels'] # 1\n", " write_PK_UNet['channels_out']=DataKeys['image_channels'] # 1\n", "\n", " write_PK_UNet['attn_dim_head']=64\n", " write_PK_UNet['attn_heads']=8\n", " write_PK_UNet['ff_mult']=2.\n", " write_PK_UNet['lowres_cond']=False # for cascading diffusion - https://cascaded-diffusion.github.io/\n", "\n", " write_PK_UNet['layer_attns']=(False, True, True, False)\n", " write_PK_UNet['layer_attns_depth']=1\n", " write_PK_UNet['layer_attns_add_text_cond']=True # whether to condition the self-attention blocks with the text embeddings, as described in Appendix D.3.1\n", " write_PK_UNet['attend_at_middle']=True # whether to have a layer of attention at the bottleneck (can turn off for higher resolution in cascading DDPM, before bringing in efficient attention)\n", " write_PK_UNet['layer_cross_attns']=(False, True, True, False)\n", " write_PK_UNet['use_linear_attn']=False\n", " write_PK_UNet['use_linear_cross_attn']=False\n", "\n", " write_PK_UNet['cond_on_text']=True\n", " write_PK_UNet['max_text_len']=ModelKeys['max_text_len'] # need to check this one\n", " # ModelKeys['UNet']['init_dim']=None\n", " write_PK_UNet['resnet_groups']=8\n", " write_PK_UNet['init_conv_kernel_size']=7 # kernel size of initial conv, if not using cross embed\n", " write_PK_UNet['init_cross_embed']=False #TODO - fix ouput size calcs for conv1d\n", " write_PK_UNet['init_cross_embed_kernel_sizes']=(3, 7, 15)\n", " write_PK_UNet['cross_embed_downsample']=False\n", " write_PK_UNet['cross_embed_downsample_kernel_sizes']=(2, 4)\n", "\n", " write_PK_UNet['attn_pool_text']=True\n", " write_PK_UNet['attn_pool_num_latents']=32 #perceiver model latents\n", " write_PK_UNet['dropout']=0.\n", " write_PK_UNet['memory_efficient']=False\n", " write_PK_UNet['init_conv_to_final_conv_residual']=False\n", "\n", " write_PK_UNet['use_global_context_attn']=True\n", " write_PK_UNet['scale_skip_connection']=True\n", " write_PK_UNet['final_resnet_block']=True\n", " write_PK_UNet['final_conv_kernel_size']=3\n", "\n", " write_PK_UNet['cosine_sim_attn']=True\n", " write_PK_UNet['self_cond']=False\n", " write_PK_UNet['combine_upsample_fmaps']=True # combine feature maps from all upsample blocks, used in unet squared successfully\n", " write_PK_UNet['pixel_shuffle_upsample']=False # may address checkboard artifacts\n", " # write_PK_UNet['beginning_and_final_conv_present']=None # use default\n", " ModelKeys['UNet']=UtilityPack.prepare_UNet_keys(write_PK_UNet)\n", " # +++++++++++++++++++++++++++++++++++++++++++++++++++++\n", " # beyond UNet, for Whole model, all keys\n", " # \"None\" means defult value on function definition\n", " write_PK_Imagen=dict()\n", "\n", " write_PK_Imagen['timesteps']=ModelKeys['diff_timesteps']\n", " write_PK_Imagen['dim']=ModelKeys['dim']\n", " write_PK_Imagen['pred_dim']=ModelKeys['pred_dim']\n", " write_PK_Imagen['loss_type']=ModelKeys['loss_type'] # 0 # MSE\n", " write_PK_Imagen['elucidated']=ModelKeys['elucidated'] # True\n", " write_PK_Imagen['padding_idx']=ModelKeys['padding_idx'] # 0 # need to check\n", " # write_PK_Imagen['cond_dim']=None # use default\n", " # write_PK_Imagen['text_embed_dim']=None\n", " # write_PK_Imagen['input_tokens']=None\n", " # write_PK_Imagen['sequence_embed']=None\n", " # write_PK_Imagen['embed_dim_position']=None\n", " write_PK_Imagen['max_text_len']=ModelKeys['max_text_len']\n", " write_PK_Imagen['cond_images_channels']=ModelKeys['cond_images_channels']\n", " write_PK_Imagen['max_length']=ModelKeys['max_length']\n", " write_PK_Imagen['device']=ModelKeys['device']\n", "\n", " # extend it to a full key\n", " ModelKeys['Imagen']=UtilityPack.prepare_ModelB_keys(write_PK_Imagen)\n", " # to be defined\n", "\n", "\n", " # --\n", " print(\"==================================================\")\n", " print(\"store the MODEL key for the next-time usage\")\n", " print(\"==================================================\")\n", " model_pack = {}\n", " model_pack['ModelKeys']=ModelKeys\n", " with open(PKeys['pk_model_pack'], 'wb') as handle:\n", " pickle.dump(model_pack, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", "\n", " print(\"Done.\")\n", "\n", "\n", "else: # both for training and testing\n", " #\n", " print(\"==================================================\")\n", " print(\"load in the MODEL key from the previous storage\")\n", " print(\"==================================================\")\n", " #\n", " # this is not the first run\n", " print('This is not the first run')\n", " print('Load back in the model packages...')\n", " with open(PKeys['pk_model_pack'], 'rb') as handle:\n", " model_pack = pickle.load(handle)\n", " # deliver the results\n", " ModelKeys=model_pack['ModelKeys']\n", " print('Done.')\n", "\n", "\n", "# print(DataKeys)\n", "# check a few key words\n", "# print (DataKeys['max_text_len'])\n", "# print(DataKeys['max_text_len'])\n", "print(ModelKeys['max_text_len'])\n", "print(DataKeys['image_channels'])\n", "print(ModelKeys['cond_images_channels'])\n", "print(ModelKeys['pred_dim'])\n", "print(json.dumps(ModelKeys['UNet'], indent=4))\n", "# print(json.dumps(ModelKeys['Imagen'], indent=4))\n", "print(ModelKeys['Imagen'])\n", "\n", "# select some key values\n", "print(ModelKeys['UNet']['channels'])\n", "print(ModelKeys['UNet']['channels_out'])\n", "print(ModelKeys['UNet']['cond_images_channels'])\n", "print(ModelKeys['Imagen']['pred_dim'])\n", "print(ModelKeys['Imagen']['cond_images_channels'])\n", "# how large the model will be\n", "print()\n", "print(ModelKeys['UNet']['dim'])\n", "print(ModelKeys['Imagen']['dim'])\n", "# =====================================\n", "# setup the model\n", "# =====================================\n", "if CKeys['Problem_ID']==1 or CKeys['Problem_ID']==2:\n", " pass\n", "# # =================================================\n", "# # Resideual level tasks: Model B\n", "# # =================================================\n", "\n", "# # working code: assemble the model archi\n", "# print('--------------------------------------------')\n", "# print('On OneD_Unet...')\n", "# print('--------------------------------------------')\n", "# working_unet = ModelPack.OneD_Unet(\n", "# CKeys=CKeys,\n", "# PKeys=ModelKeys['UNet'],\n", "# ).to(device)\n", "\n", "# print(\"Working unet part model: \")\n", "# UtilityPack.params ( working_unet)\n", "\n", "# print('--------------------------------------------')\n", "# print('On whole model...')\n", "# print('--------------------------------------------')\n", "# # on the model part\n", "# working_model = ModelPack.ProteinDesigner_B(\n", "# working_unet,\n", "# CKeys=CKeys,\n", "# PKeys=ModelKeys['Imagen'],\n", "# ). to(device)\n", "\n", "# # measure\n", "# print (\"Total working model: \")\n", "# UtilityPack.params ( working_model)\n", "# print (\"Recasted unet inside the tot one only: \")\n", "# UtilityPack.params ( working_model.imagen.unets[0])\n", "\n", "# print('--------------------------------------------')\n", "# print('On trainer...')\n", "# print('--------------------------------------------')\n", "# working_trainer = TrainerPack.ImagenTrainer(\n", "# working_model,\n", "# CKeys=CKeys\n", "# )\n", "\n", "elif CKeys['Problem_ID']==3 or CKeys['Problem_ID']==4:\n", " pass\n", "# # =================================================\n", "# # Sequence level tasks: Model A\n", "# # =================================================\n", "\n", "# # working code: assemble the model archi\n", "# print('--------------------------------------------')\n", "# print('On OneD_Unet...')\n", "# print('--------------------------------------------')\n", "# working_unet = ModelPack.OneD_Unet(\n", "# CKeys=CKeys,\n", "# PKeys=ModelKeys['UNet'],\n", "# ).to(device)\n", "\n", "# print(\"Working unet part model: \")\n", "# UtilityPack.params ( working_unet)\n", "\n", "# print('--------------------------------------------')\n", "# print('On whole model...')\n", "# print('--------------------------------------------')\n", "# # on the model part\n", "# working_model = ModelPack.ProteinDesigner_A_II(\n", "# working_unet,\n", "# CKeys=CKeys,\n", "# PKeys=ModelKeys['Imagen'],\n", "# ). to(device)\n", "\n", "# # measure\n", "# print (\"Total working model: \")\n", "# UtilityPack.params ( working_model)\n", "# print (\"Recasted unet inside the tot one only: \")\n", "# UtilityPack.params ( working_model.imagen.unets[0])\n", "\n", "# print('--------------------------------------------')\n", "# print('On trainer...')\n", "# print('--------------------------------------------')\n", "# working_trainer = TrainerPack.ImagenTrainer(\n", "# working_model,\n", "# CKeys=CKeys\n", "# )\n", "\n", "elif CKeys['Problem_ID']==5 or CKeys['Problem_ID']==6 \\\n", "or CKeys['Problem_ID']==11:\n", " # =================================================\n", " # Resideual level tasks: Model B\n", " # =================================================\n", "\n", " # working code: assemble the model archi\n", " print('--------------------------------------------')\n", " print('On OneD_Unet...')\n", " print('--------------------------------------------')\n", " working_unet = ModelPack.OneD_Unet(\n", " CKeys=CKeys,\n", " PKeys=ModelKeys['UNet'],\n", " ).to(device)\n", "\n", " print(\"Working unet part model: \")\n", " UtilityPack.params ( working_unet)\n", "\n", " print('--------------------------------------------')\n", " print('On whole model...')\n", " print('--------------------------------------------')\n", " # on the model part\n", " working_model = ModelPack.ProteinDesigner_B(\n", " working_unet,\n", " CKeys=CKeys,\n", " PKeys=ModelKeys['Imagen'],\n", " ). to(device)\n", "\n", " # measure\n", " print (\"Total working model: \")\n", " UtilityPack.params ( working_model)\n", " print (\"Recasted unet inside the tot one only: \")\n", " UtilityPack.params ( working_model.imagen.unets[0])\n", "\n", " print('--------------------------------------------')\n", " print('On trainer...')\n", " print('--------------------------------------------')\n", " working_trainer = TrainerPack.ImagenTrainer(\n", " working_model,\n", " CKeys=CKeys\n", " )\n", "\n", "elif CKeys['Problem_ID']==7 or CKeys['Problem_ID']==8:\n", " # =================================================\n", " # Sequence level tasks: Model A\n", " # =================================================\n", "\n", " # working code: assemble the model archi\n", " print('--------------------------------------------')\n", " print('On OneD_Unet...')\n", " print('--------------------------------------------')\n", " working_unet = ModelPack.OneD_Unet(\n", " CKeys=CKeys,\n", " PKeys=ModelKeys['UNet'],\n", " ).to(device)\n", "\n", " print(\"Working unet part model: \")\n", " UtilityPack.params ( working_unet)\n", "\n", " print('--------------------------------------------')\n", " print('On whole model...')\n", " print('--------------------------------------------')\n", " # on the model part\n", " working_model = ModelPack.ProteinDesigner_A_II(\n", " working_unet,\n", " CKeys=CKeys,\n", " PKeys=ModelKeys['Imagen'],\n", " ). to(device)\n", "\n", " # measure\n", " print (\"Total working model: \")\n", " UtilityPack.params ( working_model)\n", " print (\"Recasted unet inside the tot one only: \")\n", " UtilityPack.params ( working_model.imagen.unets[0])\n", "\n", " print('--------------------------------------------')\n", " print('On trainer...')\n", " print('--------------------------------------------')\n", " working_trainer = TrainerPack.ImagenTrainer(\n", " working_model,\n", " CKeys=CKeys\n", " )\n", "\n", "\n", "\n", "del working_unet\n", "torch.cuda.empty_cache()" ] }, { "cell_type": "code", "execution_count": 11, "id": "oP6SZ_omkf8k", "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "oP6SZ_omkf8k", "outputId": "5f3b1fc4-3647-4a11-da46-c930d8fc9391" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Check tokenizer_X: None\n", "tokenizer_y: None\n" ] } ], "source": [ "#@title ### 1.4. Training part\n", "\n", "if CKeys['Working_Mode']==1:\n", " print(\"Training mode...\")\n", "\n", " TrainKeys={}\n", " if CKeys['Debug']==1:\n", " TrainKeys['epochs']=4\n", " TrainKeys['print_loss_every_this_epochs']=1\n", " TrainKeys['sample_every_this_epochs']=1\n", " TrainKeys['save_model_every_this_epochs']=2\n", " else:\n", " TrainKeys['epochs']=CKeys['epochs'] # 200\n", " TrainKeys['print_loss_every_this_epochs']=CKeys['print_loss_every_this_epochs'] # 5\n", " TrainKeys['sample_every_this_epochs']=CKeys['sample_every_this_epochs'] # 10\n", " TrainKeys['save_model_every_this_epochs']=CKeys['save_model_every_this_epochs'] # 20\n", "\n", "# one mini batch\n", "if CKeys['Debug']==1 and CKeys['Debug_DataPack']==1:\n", " print(\"in a mini-batch:\")\n", " print(\"input.dim: \", this_item[0].shape)\n", " # print(this_item[0][0,:]*torch.FloatTensor(DataKeys['Xnormfac']))\n", " print(\"output.dim: \", this_item[1].shape)\n", " print(DataKeys['Xnormfac'])\n", "\n", "if CKeys['Working_Mode']==1:\n", " # ==============================================================\n", " # copy from the above when debug is done:\n", " # ==============================================================\n", " # a few files for restarts:\n", " TRAINING_HIST_FILE = ModelKeys['model_dir']+'Training_Hist.csv'\n", " TRAINING_HIST_FILE_FULL = ModelKeys['model_dir']+'Training_Hist_Full.csv'\n", " TRAINING_BREAK_POINT = ModelKeys['model_dir']+'Training_Info.txt'\n", "\n", " if CKeys['Problem_ID']==1:\n", " pass\n", " # # de novo test\n", " # test_condition_list = [\n", " # ['~~~HHHHHHHHHHHHHHH~~'],\n", " # ['~~EEESSTTS~SEEEEEEEEE~SBS~EEEEEE~~'],\n", " # ]\n", "\n", " elif CKeys['Problem_ID']==2:\n", " pass\n", "\n", " # test_condition_list = [\n", " # np.expand_dims(np.array(DataSetPack.pad_a_np_arr(protein_df['sample_NormPullGap_data'][pick_id_0],0,64))*DataKeys['Xnormfac']*0.33, axis=0),\n", " # np.expand_dims(np.array(DataSetPack.pad_a_np_arr(protein_df['sample_NormPullGap_data'][pick_id_0],0,64))*DataKeys['Xnormfac']*0.66, axis=0),\n", " # ]\n", "\n", " # de novo test\n", " elif CKeys['Problem_ID']==3:\n", " pass\n", "\n", " # test_condition_list = [\n", " # [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],\n", " # [0, 0.7, 0.07, 0.1, 0.01, 0.02, 0.01, 0.11],\n", " # ]\n", "\n", " elif CKeys['Problem_ID']==4:\n", " pass\n", "\n", " # # here we used normalized (nFmax, nToughness)\n", " # # the real values will be (nFmax, nToughness)*Xnormfac\n", " # test_condition_list = [\n", " # [0.5, 0.5],\n", " # [0.2, 0.8],\n", " # ]\n", "\n", " # # #\n", " # # test_condition_list = [\n", " # # np.expand_dims(np.array(this_item[0][10,:]*DataKeys['Xnormfac']), axis=0),\n", " # # np.expand_dims(np.array(this_item[0][20,:]*DataKeys['Xnormfac']), axis=0),\n", " # # ]\n", " # # protein_df['sample_NormPullGap_data'][pick_id_0]\n", " # # DataSetPack.pad_a_np_arr(protein_df['sample_NormPullGap_data'][pick_id_0],0,64)\n", " # # #\n", " # test_condition_list = [\n", " # np.expand_dims(np.array(DataSetPack.pad_a_np_arr(protein_df['sample_NormPullGap_data'][pick_id_0],0,64))*DataKeys['Xnormfac']*0.33, axis=0),\n", " # np.expand_dims(np.array(DataSetPack.pad_a_np_arr(protein_df['sample_NormPullGap_data'][pick_id_0],0,64))*DataKeys['Xnormfac']*0.66, axis=0),\n", " # ]\n", " elif CKeys['Problem_ID']==5:\n", " pass\n", "\n", " # # de novo test\n", " # test_condition_list = [\n", " # ['~~~HHHHHHHHHHHHHHH~~'],\n", " # ['~~EEESSTTS~SEEEEEEEEE~SBS~EEEEEE~~'],\n", " # ]\n", "\n", " #\n", " elif CKeys['Problem_ID']==6 or CKeys['Problem_ID']==11:\n", " # for ESM model: 0+content+00\n", " # test_condition_list = [\n", " # np.expand_dims(np.array([0]+DataSetPack.pad_a_np_arr(protein_df['sample_NormPullGap_data'][pick_id_0],0,64-1))*DataKeys['Xnormfac']*0.33, axis=0),\n", " # np.expand_dims(np.array([0]+DataSetPack.pad_a_np_arr(protein_df['sample_NormPullGap_data'][pick_id_0],0,64-1))*DataKeys['Xnormfac']*0.66, axis=0),\n", " # ]\n", " #\n", " # test_condition_list = [\n", " # np.insert(np.array(DataSetPack.pad_a_np_arr(protein_df['sample_NormPullGap_data'][pick_id_0],0,64-1))*DataKeys['Xnormfac']*0.33, 0, 0.),\n", " # np.insert(np.array(DataSetPack.pad_a_np_arr(protein_df['sample_NormPullGap_data'][pick_id_0],0,64-1))*DataKeys['Xnormfac']*0.66, 0, 0.),\n", " # ]\n", " #\n", " test_0 = DataSetPack.pad_a_np_arr(\n", " protein_df['sample_FORCE_data'][pick_id_0],\n", " 0.,\n", " DataKeys['max_AA_seq_len']\n", " )\n", " test_1 = DataSetPack.pad_a_np_arr_esm(\n", " protein_df['sample_FORCE_data'][pick_id_1],\n", " 0.,\n", " DataKeys['max_AA_seq_len']\n", " )\n", " # test_condition_list = [\n", " # test_0*0.33,\n", " # test_0*0.66,\n", " # test_1*0.33,\n", " # test_1*0.66,\n", " # ]\n", " test_condition_list = [\n", " test_0*0.66,\n", " test_1*1.66,\n", " ]\n", "\n", "\n", " #\n", " # de novo test\n", " elif CKeys['Problem_ID']==7:\n", " pass\n", "\n", " # test_condition_list = [\n", " # [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],\n", " # [0, 0.7, 0.07, 0.1, 0.01, 0.02, 0.01, 0.11],\n", " # ]\n", "\n", " elif CKeys['Problem_ID']==8:\n", "\n", " test_condition_list = [\n", " [0.2, 0.8]*DataKeys['Xnormfac'],\n", " [0.8, 0.2]*DataKeys['Xnormfac'],\n", " ]\n", "\n", "\n", " else:\n", " print(\"No de novo condition is found for the problem...\")\n", "\n", "\n", "\n", "print('Check tokenizer_X:', tokenizer_X)\n", "print('tokenizer_y:', tokenizer_y)\n", "# skip training part" ] }, { "cell_type": "code", "execution_count": 12, "id": "k65qjerclFxn", "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "k65qjerclFxn", "outputId": "eed8ac6c-fa75-4784-cde6-f3bc7866aeb7" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "This is testing model...\n", "Creating the given path...\n", "Done.\n", "/content/working_results/\n", "Load back the saved model...\n", "checkpoint loaded from /content/working_results/1_model_SS/trainer_save-model_pLDM.pt\n", "11\n", "esm2_t30_150M_UR50D\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "Downloading: \"https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t30_150M_UR50D.pt\" to /root/.cache/torch/hub/checkpoints/esm2_t30_150M_UR50D.pt\n", "Downloading: \"https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t30_150M_UR50D-contact-regression.pt\" to /root/.cache/torch/hub/checkpoints/esm2_t30_150M_UR50D-contact-regression.pt\n" ] } ], "source": [ "#@title ### 1.5. Testing loop\n", "\n", "if CKeys['Working_Mode']==2:\n", " print(\"This is testing model...\")\n", " # setup test key\n", " TestKeys={}\n", " TestKeys['Sample_On_TestSet']=False # True # True # False\n", " TestKeys['Sample_On_DeNovo']=True\n", " # create a test dir\n", " # TestKeys['test_dir']=PKeys['prefix']+'2_test/'\n", " # TestKeys['test_dir']=PKeys['prefix']+'3_test_common_denovo/'\n", " # add for colab\n", " TestKeys['test_dir']=PKeys['prefix']+'4_test_individual_cases/'\n", " UtilityPack.create_path(TestKeys['test_dir'])\n", "print(PKeys['prefix'])\n", "\n", "\n", "if CKeys['Working_Mode']==2:\n", " # skip for colab inferring\n", " pass\n", " # # looking back at the training\n", " # print(\"Check the training history...\")\n", " # #\n", " # # training history\n", " # TRAINING_HIST_FILE = ModelKeys['model_dir']+'Training_Hist.csv'\n", " # TRAINING_HIST_FILE_FULL = ModelKeys['model_dir']+'Training_Hist_Full.csv'\n", " # TRAINING_BREAK_POINT = ModelKeys['model_dir']+'Training_Info.txt'\n", "\n", " # # pick up the leftover info\n", " # # train_rec = pd.read_csv(TRAINING_HIST_FILE)\n", " # train_rec = pd.read_csv(TRAINING_HIST_FILE_FULL)\n", " # #\n", " # id_best_loss = np.argmin(train_rec['norm_loss'])\n", " # best_epoch = train_rec['epoch'][id_best_loss]\n", " # # print(best_epoch)\n", " # print(f\"Best epoch: {best_epoch}; LOSS: {train_rec['norm_loss'][id_best_loss]}\", )\n", "\n", " # fig = plt.figure()\n", " # plt.plot(\n", " # train_rec['epoch'],\n", " # train_rec['norm_loss'],\n", " # label='Loss',\n", " # )\n", " # plt.legend()\n", " # outname=TestKeys['test_dir']+\"0_Training_Hist.jpg\"\n", " # if CKeys['SlientRun']==1:\n", " # plt.savefig(outname, dpi=200)\n", " # else:\n", " # pass\n", " # plt.show()\n", " # plt.close(fig)\n", "\n", " # #\n", " # # pick up the breaking point of last time\n", " # breaking_rec = pd.read_csv(TRAINING_BREAK_POINT)\n", " # last_epoch = breaking_rec['epoch'][0]\n", " # last_step = breaking_rec['steps'][0]\n", " # # print(\"Last epoch: \", breaking_rec['epoch'][0])\n", " # print(f\"Last epoch: {last_epoch}; LOSS: {breaking_rec['norm_loss'][0]}\", )\n", " #\n", " # last_epoch = 4000\n", " # last_step = 92000\n", "\n", "\n", "if CKeys['Working_Mode']==2:\n", " print(\"Load back the saved model...\")\n", " # load back the LAST model:\n", " # model_last_fname=f\"{ModelKeys['model_dir_model']}trainer_save-model-epoch_{last_epoch}.pt\"\n", " model_last_fname=f\"{this_working_path}1_model_SS/trainer_save-model_pLDM.pt\"\n", " working_trainer.load(model_last_fname)\n", " # TBA: load back the Best model AVAILABLE\n", "\n", "print(CKeys['Problem_ID'])\n", "print(DataKeys['ESM-2_Model'])\n", "# +++++++++++++++++++++++++++++++++++++++++++++\n", "# for pLM models, we need to add the\n", "# pretrained pLM model\n", "# +++++++++++++++++++++++++++++++++++++++++++++\n", "if CKeys['Working_Mode']==2:\n", " #\n", " if CKeys['Problem_ID']==5 or CKeys['Problem_ID']==6 \\\n", " or CKeys['Problem_ID']==11 :\n", " pLM_Model_Name=DataKeys['ESM-2_Model']\n", "# # --\n", "# import esm\n", "# # make this into a function\n", "# # ++ for pLM\n", "# if pLM_Model_Name=='None':\n", "# pLM_Model=None\n", "\n", "# elif pLM_Model_Name=='esm2_t33_650M_UR50D':\n", "# # dim: 1280\n", "# pLM_Model, esm_alphabet = esm.pretrained.esm2_t33_650M_UR50D()\n", "# len_toks=len(esm_alphabet.all_toks)\n", "# pLM_Model.eval()\n", "# pLM_Model. to(device)\n", "\n", "# elif pLM_Model_Name=='esm2_t36_3B_UR50D':\n", "# # dim: 2560\n", "# esm_layer=36\n", "# pLM_Model, esm_alphabet = esm.pretrained.esm2_t36_3B_UR50D()\n", "# len_toks=len(esm_alphabet.all_toks)\n", "# pLM_Model.eval()\n", "# pLM_Model. to(device)\n", "\n", "# elif pLM_Model_Name=='esm2_t30_150M_UR50D':\n", "# # dim: 640\n", "# esm_layer=30\n", "# pLM_Model, esm_alphabet = esm.pretrained.esm2_t30_150M_UR50D()\n", "# len_toks=len(esm_alphabet.all_toks)\n", "# pLM_Model.eval()\n", "# pLM_Model. to(device)\n", "\n", "# elif pLM_Model_Name=='esm2_t12_35M_UR50D':\n", "# # dim: 480\n", "# esm_layer=12\n", "# pLM_Model, esm_alphabet = esm.pretrained.esm2_t12_35M_UR50D()\n", "# len_toks=len(esm_alphabet.all_toks)\n", "# pLM_Model.eval()\n", "# pLM_Model. to(device)\n", "\n", "# else:\n", "# print(\"pLM model is missing...\")\n", " pLM_Model,esm_alphabet,_,_ = UtilityPack.load_in_pLM(\n", " pLM_Model_Name,\n", " device\n", " )\n" ] }, { "cell_type": "code", "execution_count": 13, "id": "LbFnbaCB2A7H", "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "f6ec7f703621416fb631812d2f8def00", "68b23bcc31a8453890de44568dfeb1ee", "ef0f7e87f6334939bea151726af03b83", "5d5f291eb6644e6d95d07ba755b56870", "a37c10d569254e958453c189f15cdf9c", "e37c525959bc4dc0ad96b94d83153319", "2ab06057ee1e4c11a487f0d1b289373c", "595600bfe70f4c0bb5fb200217116674", "e2893aae1bb7495e96c770bcdf3dfa4d", "b7dfef148b8645ea9b2c70103a81b6d0", "d908b047df4f4ea59d37355d673319bc", "f833b0f88df14acf8432b8a7098d029f", "bf0a9c22ce974f2eb796c2275e7dec12", "1521cf912d2f4207866034891baad8d9", "51c23b8303c9450f8cfe8c8015ccfa9d", "9901ee5e2d554071a43888b93ddf0073", "a1cfeea9cbab45eca372c506825e25f4", "af16e614a3104abfa5179e6b3e066397", "0f9a7631770a4c64845d95effc1c38ba", "b164b59990084bed94eca230208166ca", "7b5706c2f1a84fabbb828a1bacb36b3d", "d04ff74e247443bc8d8127e3f2161c0c" ] }, "id": "LbFnbaCB2A7H", "outputId": "557258f6-671a-4a97-ccf4-3962a1e14b6a" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Run sampling on De Novo inputs set...\n", "Loading back a common de novo tests: \n", "Get de novo tests #: 8\n", "On the inputs:\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "
" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Now, make design based on the inputs...\n", "\n", "\n", "\n", "Producing 2 samples...from image conditingig x_data ...\n", "Input contents:\n", "cond_img condition: x_data=\n", " [array([ 0. , 0. , 0. , 0. ,\n", " 0. , 0. , 0. , 0. ,\n", " 18.28863362, 80.0692019 , 138.13979123, 139.44611167,\n", " 129.23179473, 124.99945748, 147.03594515, 154.26267409,\n", " 85.748749 , 94.74763505, 127.92603341, 123.13805332,\n", " 99.12313939, 146.59346182, 165.770035 , 193.86780985,\n", " 214.00913136, 230.90415894, 285.97514621, 220.28478982,\n", " 165.21657777, 140.97041526, 160.604996 , 182.73803652,\n", " 159.35024788, 149.0699255 , 150.54626136, 148.51384136,\n", " 141.537062 , 178.25737158, 183.89341758, 187.809765 ,\n", " 179.40302 , 167.89063924, 145.78564655, 130.70708892,\n", " 156.36648317, 138.20652636, 134.73462406, 99.97278411,\n", " 119.99524355, 175.66467083, 201.65559424, 212.72502818,\n", " 187.39930591, 153.31598397, 134.58059364, 195.43962833,\n", " 201.50041515, 198.09182045, 181.17193439, 175.74727894,\n", " 156.24546282, 153.27967029, 149.26799638, 144.94954964,\n", " 138.48748241, 154.92514391, 179.62173 , 185.41952212,\n", " 194.21023345, 183.58478745, 157.48736061, 156.53251552,\n", " 171.67232909, 158.94420333, 176.64668212, 154.79523564,\n", " 167.48380455, 189.05203 , 173.74290091, 186.28143273,\n", " 214.02058894, 215.24586955, 219.26901409, 213.58790242,\n", " 200.20914818, 223.59095136, 212.30616773, 233.90132136,\n", " 228.09856667, 222.87644909, 256.01106682, 250.59852318,\n", " 243.01985182, 292.44150727, 284.88984348, 284.51895106,\n", " 295.07615818, 332.62036364, 365.2915753 , 382.064295 ]), array([ 0. , 0. , 0. , 0. ,\n", " 0. , 0. , 0. , 0. ,\n", " 12.19242241, 53.37946793, 92.09319415, 92.96407444,\n", " 86.15452982, 83.33297166, 98.02396343, 102.84178273,\n", " 57.16583267, 63.16509003, 85.28402227, 82.09203555,\n", " 66.08209293, 97.72897455, 110.51335667, 129.24520657,\n", " 142.67275424, 153.93610596, 190.65009747, 146.85652655,\n", " 110.14438518, 93.98027684, 113.26618295, 130.32051743,\n", " 115.21384005, 109.95814935, 111.50363362, 112.12047879,\n", " 110.6379971 , 139.25401676, 147.61078685, 144.61681044,\n", " 135.93508952, 133.8414882 , 112.20370803, 105.51703962,\n", " 126.78428185, 115.69128733, 114.14739412, 86.50069256,\n", " 97.78519682, 137.55264183, 161.15336059, 169.76223629,\n", " 154.79631037, 136.03060112, 129.56109911, 159.6066976 ,\n", " 159.01942819, 156.89666416, 140.6024269 , 145.42806336,\n", " 136.13430188, 133.84294094, 130.63544912, 126.44938608,\n", " 125.60422528, 139.85916623, 153.52631933, 157.29334918,\n", " 166.96625393, 163.38836786, 146.11630993, 156.10143063,\n", " 169.23123586, 157.99337498, 162.88871136, 142.62287141,\n", " 156.99899748, 176.13464122, 163.78415271, 164.45332481,\n", " 189.85552387, 195.0109843 , 195.37009694, 189.6662509 ,\n", " 189.36861348, 214.00427838, 213.70498551, 233.86747303,\n", " 236.059855 , 238.31386515, 263.50758182, 255.58019414,\n", " 242.30366399, 269.54060652, 264.16919869, 272.51291813,\n", " 283.48890394, 310.40469131, 337.19108152, 362.22738833])]\n", "Text condition: X_cond=\n", " None\n", "Working on cond_scale 1.0\n", "Conditioning target sequence provided via x_data_tokenized ... tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0244,\n", " 0.1068, 0.1842, 0.1859, 0.1723, 0.1667, 0.1960, 0.2057, 0.1143, 0.1263,\n", " 0.1706, 0.1642, 0.1322, 0.1955, 0.2210, 0.2585, 0.2853, 0.3079, 0.3813,\n", " 0.2937, 0.2203, 0.1880, 0.2141, 0.2437, 0.2125, 0.1988, 0.2007, 0.1980,\n", " 0.1887, 0.2377, 0.2452, 0.2504, 0.2392, 0.2239, 0.1944, 0.1743, 0.2085,\n", " 0.1843, 0.1796, 0.1333, 0.1600, 0.2342, 0.2689, 0.2836, 0.2499, 0.2044,\n", " 0.1794, 0.2606, 0.2687, 0.2641, 0.2416, 0.2343, 0.2083, 0.2044, 0.1990,\n", " 0.1933, 0.1846, 0.2066, 0.2395, 0.2472, 0.2589, 0.2448, 0.2100, 0.2087,\n", " 0.2289, 0.2119, 0.2355, 0.2064, 0.2233, 0.2521, 0.2317, 0.2484, 0.2854,\n", " 0.2870, 0.2924, 0.2848, 0.2669, 0.2981, 0.2831, 0.3119, 0.3041, 0.2972,\n", " 0.3413, 0.3341, 0.3240, 0.3899, 0.3799, 0.3794, 0.3934, 0.4435, 0.4871,\n", " 0.5094, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n", " 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n", " 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n", " 0.0000, 0.0000],\n", " [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0163,\n", " 0.0712, 0.1228, 0.1240, 0.1149, 0.1111, 0.1307, 0.1371, 0.0762, 0.0842,\n", " 0.1137, 0.1095, 0.0881, 0.1303, 0.1474, 0.1723, 0.1902, 0.2052, 0.2542,\n", " 0.1958, 0.1469, 0.1253, 0.1510, 0.1738, 0.1536, 0.1466, 0.1487, 0.1495,\n", " 0.1475, 0.1857, 0.1968, 0.1928, 0.1812, 0.1785, 0.1496, 0.1407, 0.1690,\n", " 0.1543, 0.1522, 0.1153, 0.1304, 0.1834, 0.2149, 0.2263, 0.2064, 0.1814,\n", " 0.1727, 0.2128, 0.2120, 0.2092, 0.1875, 0.1939, 0.1815, 0.1785, 0.1742,\n", " 0.1686, 0.1675, 0.1865, 0.2047, 0.2097, 0.2226, 0.2179, 0.1948, 0.2081,\n", " 0.2256, 0.2107, 0.2172, 0.1902, 0.2093, 0.2348, 0.2184, 0.2193, 0.2531,\n", " 0.2600, 0.2605, 0.2529, 0.2525, 0.2853, 0.2849, 0.3118, 0.3147, 0.3178,\n", " 0.3513, 0.3408, 0.3231, 0.3594, 0.3522, 0.3634, 0.3780, 0.4139, 0.4496,\n", " 0.4830, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n", " 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n", " 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n", " 0.0000, 0.0000]]) torch.Size([2, 128])\n", "x_data.dim provided from x_data_tokenized: torch.Size([2, 33, 128])\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "0it [00:00, ?it/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "f6ec7f703621416fb631812d2f8def00" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "sampling time step: 0%| | 0/96 [00:00" ], "image/png": "\n" }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "tensor([17, 20, 14, 14, 4, 5, 15, 7, 12, 11, 18, 10, 5, 17, 11, 6, 14, 18,\n", " 15, 4, 14, 9, 14, 9, 9, 12, 20, 10, 16, 10, 6, 8, 5, 6, 6, 14,\n", " 14, 21, 19, 6, 14, 15, 7, 17, 15, 19, 11, 14, 5, 6, 7, 15, 11, 7,\n", " 9, 15, 16, 22, 14, 9, 6, 4, 4, 15, 13, 9, 9, 15, 14, 9, 7, 7,\n", " 14, 11, 11, 10, 8, 5, 20, 15, 18, 19, 8, 12, 7, 5, 5, 14, 15, 8,\n", " 17, 6, 4, 7, 13, 13, 12, 11, 15, 16, 17, 9, 8, 15, 9, 11, 20, 13,\n", " 17, 11, 13, 17, 11, 11, 11, 17, 11, 13, 13, 17, 13, 13, 11, 17, 13, 11,\n", " 11, 10], device='cuda:0')\n", "tensor([ 5, 20, 13, 13, 14, 11, 15, 4, 5, 5, 4, 17, 19, 4, 11, 10, 5, 18,\n", " 8, 4, 7, 8, 5, 5, 13, 4, 4, 7, 18, 10, 9, 5, 10, 4, 6, 18,\n", " 14, 14, 10, 6, 10, 10, 5, 6, 12, 19, 19, 6, 5, 17, 13, 7, 11, 11,\n", " 23, 16, 18, 23, 6, 7, 6, 6, 18, 9, 5, 9, 9, 6, 5, 13, 14, 14,\n", " 4, 11, 22, 22, 15, 22, 20, 14, 8, 14, 12, 7, 7, 4, 5, 8, 15, 5,\n", " 13, 13, 12, 7, 14, 9, 4, 15, 4, 4, 11, 9, 9, 8, 15, 14, 11, 11,\n", " 11, 17, 13, 17, 11, 11, 17, 11, 17, 13, 11, 11, 17, 17, 17, 11, 11, 11,\n", " 11, 11], device='cuda:0')\n", "For None or [ 0. 0. 0. 0. 0.\n", " 0. 0. 0. 18.28863362 80.0692019\n", " 138.13979123 139.44611167 129.23179473 124.99945748 147.03594515\n", " 154.26267409 85.748749 94.74763505 127.92603341 123.13805332\n", " 99.12313939 146.59346182 165.770035 193.86780985 214.00913136\n", " 230.90415894 285.97514621 220.28478982 165.21657777 140.97041526\n", " 160.604996 182.73803652 159.35024788 149.0699255 150.54626136\n", " 148.51384136 141.537062 178.25737158 183.89341758 187.809765\n", " 179.40302 167.89063924 145.78564655 130.70708892 156.36648317\n", " 138.20652636 134.73462406 99.97278411 119.99524355 175.66467083\n", " 201.65559424 212.72502818 187.39930591 153.31598397 134.58059364\n", " 195.43962833 201.50041515 198.09182045 181.17193439 175.74727894\n", " 156.24546282 153.27967029 149.26799638 144.94954964 138.48748241\n", " 154.92514391 179.62173 185.41952212 194.21023345 183.58478745\n", " 157.48736061 156.53251552 171.67232909 158.94420333 176.64668212\n", " 154.79523564 167.48380455 189.05203 173.74290091 186.28143273\n", " 214.02058894 215.24586955 219.26901409 213.58790242 200.20914818\n", " 223.59095136 212.30616773 233.90132136 228.09856667 222.87644909\n", " 256.01106682 250.59852318 243.01985182 292.44150727 284.88984348\n", " 284.51895106 295.07615818 332.62036364 365.2915753 382.064295 ], predicted sequence: MPPLAKVITFRANTGPFKLPEPEEIMRQRGSAGGPPHYGPKVNKYTPAGVKTVEKQWPEGLLKDEEKPEVVPTTRSAMKFYSIVAAPKSNGLVDDITKQ\n", "================================================\n", "foldproteins: True\n", "Writing FASTA file: /content/working_results/4_test_individual_cases/fasta_in_100.fasta\n", "Now run OmegaFold.... on device=cuda:0\n", "INFO:root:Loading weights from /root/.cache/omegafold_ckpt/model.pt\n", "INFO:root:Constructing OmegaFold\n", "INFO:root:Reading /content/working_results/4_test_individual_cases/fasta_in_100.fasta\n", "INFO:root:Predicting 1th chain in /content/working_results/4_test_individual_cases/fasta_in_100.fasta\n", "INFO:root:99 residues in this chain.\n", "INFO:root:Finished prediction in 8.89 seconds.\n", "INFO:root:Saving prediction to /content/working_results/4_test_individual_cases/temp_100.pdb\n", "INFO:root:Saved\n", "INFO:root:Done!\n", "\n", "Done OmegaFold\n", "Resulting PDB file...: /content/working_results/4_test_individual_cases/temp_100.pdb\n", "Properly named PDB file produced: /content/working_results/4_test_individual_cases/DN_0_CondS_No_0_Val_1.0_epo_100_step_100.pdb\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} }, { "output_type": "display_data", "data": { "application/3dmoljs_load.v0": "
\n

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n jupyter labextension install jupyterlab_3dmol

\n
\n", "text/html": [ "
\n", "

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n", " jupyter labextension install jupyterlab_3dmol

\n", "
\n", "" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "For None or [ 0. 0. 0. 0. 0.\n", " 0. 0. 0. 12.19242241 53.37946793\n", " 92.09319415 92.96407444 86.15452982 83.33297166 98.02396343\n", " 102.84178273 57.16583267 63.16509003 85.28402227 82.09203555\n", " 66.08209293 97.72897455 110.51335667 129.24520657 142.67275424\n", " 153.93610596 190.65009747 146.85652655 110.14438518 93.98027684\n", " 113.26618295 130.32051743 115.21384005 109.95814935 111.50363362\n", " 112.12047879 110.6379971 139.25401676 147.61078685 144.61681044\n", " 135.93508952 133.8414882 112.20370803 105.51703962 126.78428185\n", " 115.69128733 114.14739412 86.50069256 97.78519682 137.55264183\n", " 161.15336059 169.76223629 154.79631037 136.03060112 129.56109911\n", " 159.6066976 159.01942819 156.89666416 140.6024269 145.42806336\n", " 136.13430188 133.84294094 130.63544912 126.44938608 125.60422528\n", " 139.85916623 153.52631933 157.29334918 166.96625393 163.38836786\n", " 146.11630993 156.10143063 169.23123586 157.99337498 162.88871136\n", " 142.62287141 156.99899748 176.13464122 163.78415271 164.45332481\n", " 189.85552387 195.0109843 195.37009694 189.6662509 189.36861348\n", " 214.00427838 213.70498551 233.86747303 236.059855 238.31386515\n", " 263.50758182 255.58019414 242.30366399 269.54060652 264.16919869\n", " 272.51291813 283.48890394 310.40469131 337.19108152 362.22738833], predicted sequence: MDDPTKLAALNYLTRAFSLVSAADLLVFREARLGFPPRGRRAGIYYGANDVTTCQFCGVGGFEAEEGADPPLTWWKWMPSPIVVLASKADDIVPELKLL\n", "================================================\n", "foldproteins: True\n", "Writing FASTA file: /content/working_results/4_test_individual_cases/fasta_in_100.fasta\n", "Now run OmegaFold.... on device=cuda:0\n", "INFO:root:Loading weights from /root/.cache/omegafold_ckpt/model.pt\n", "INFO:root:Constructing OmegaFold\n", "INFO:root:Reading /content/working_results/4_test_individual_cases/fasta_in_100.fasta\n", "INFO:root:Predicting 1th chain in /content/working_results/4_test_individual_cases/fasta_in_100.fasta\n", "INFO:root:99 residues in this chain.\n", "INFO:root:Finished prediction in 8.87 seconds.\n", "INFO:root:Saving prediction to /content/working_results/4_test_individual_cases/temp_100.pdb\n", "INFO:root:Saved\n", "INFO:root:Done!\n", "\n", "Done OmegaFold\n", "Resulting PDB file...: /content/working_results/4_test_individual_cases/temp_100.pdb\n", "Properly named PDB file produced: /content/working_results/4_test_individual_cases/DN_1_CondS_No_0_Val_1.0_epo_100_step_100.pdb\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} }, { "output_type": "display_data", "data": { "application/3dmoljs_load.v0": "
\n

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n jupyter labextension install jupyterlab_3dmol

\n
\n", "text/html": [ "
\n", "

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n", " jupyter labextension install jupyterlab_3dmol

\n", "
\n", "" ] }, "metadata": {} } ], "source": [ "#@title ### Example:\n", "\n", "#@markdown Infer for given unfolding force vecter\n", "\n", "\n", "Sample_for_Num_inputs = 2 #@param {type:\"slider\", min:1, max:8, step:1}\n", "\n", "\n", "\n", "\n", "#\n", "if CKeys['Working_Mode']==2:\n", " if TestKeys['Sample_On_DeNovo']:\n", " #\n", " print(\"Run sampling on De Novo inputs set...\")\n", " #\n", " # de novo test\n", " if CKeys['Problem_ID']==2 or CKeys['Problem_ID']==6 \\\n", " or CKeys['Problem_ID']==11:\n", " print('Loading back a common de novo tests: ')\n", " import pickle\n", " pk_save_de_novo_list = this_working_path+'0_dataprocess_MD/ForTest_LE_128_From_F1_f5.pt'\n", "\n", " with open(pk_save_de_novo_list, 'rb') as handle:\n", " de_novo_test_condition_list = pickle.load(handle)\n", "\n", " de_novo_test_condition_list = de_novo_test_condition_list[:8]\n", " print(\"Get de novo tests #: \", len(de_novo_test_condition_list))\n", " # de_novo_test_condition_list = [\n", " # # =====================================\n", " # # sacle max:\n", "\n", " # UtilityPack.interpolate_and_resample_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][pick_id_0]*1.0,\n", " # seq_len1=protein_df['seq_len'][pick_id_0],\n", " # )['y1'], # max_forc x 1\n", "\n", " # UtilityPack.interpolate_and_resample_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][pick_id_0]*0.66,\n", " # seq_len1=60,\n", " # )['y1'], # max_forc x 0.66\n", "\n", " # UtilityPack.interpolate_and_resample_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][pick_id_0]*0.33,\n", " # seq_len1=60,\n", " # )['y1'], # max_forc x 0.33\n", "\n", " # UtilityPack.interpolate_and_resample_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][pick_id_0]*1.50,\n", " # seq_len1=60,\n", " # )['y1'], # max_forc x 1.5\n", " # # =====================================\n", " # # scale mid peak: 2\n", "\n", " # UtilityPack.interpolate_and_resample_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][2]*1.0,\n", " # seq_len1=protein_df['seq_len'][2],\n", " # )['y1'], # peak_mid x 1\n", "\n", " # UtilityPack.interpolate_and_resample_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][2]*0.66,\n", " # seq_len1=60,\n", " # )['y1'], # max_forc x 0.66\n", "\n", " # UtilityPack.interpolate_and_resample_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][2]*0.33,\n", " # seq_len1=60,\n", " # )['y1'], # max_forc x 0.33\n", "\n", " # UtilityPack.interpolate_and_resample_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][2]*1.50,\n", " # seq_len1=60,\n", " # )['y1'], # max_forc x 1.50\n", " # # ======================================\n", " # # mix two: max and min\n", "\n", " # UtilityPack.mix_two_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][pick_id_0]*1.,\n", " # y1=protein_df['sample_FORCE_data'][pick_id_1]*0.,\n", " # seq_len2=60,\n", " # )['y2'], # max x 1 + min x 0\n", "\n", " # UtilityPack.mix_two_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][pick_id_0]*2./3.,\n", " # y1=protein_df['sample_FORCE_data'][pick_id_1]*1./3.,\n", " # seq_len2=60,\n", " # )['y2'], # max x 2/3 + min x 1/3\n", "\n", " # UtilityPack.mix_two_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][pick_id_0]*1./2.,\n", " # y1=protein_df['sample_FORCE_data'][pick_id_1]*1./2.,\n", " # seq_len2=60,\n", " # )['y2'], # max x 1/2 + min x 1/2\n", "\n", " # UtilityPack.mix_two_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][pick_id_0]*1./3.,\n", " # y1=protein_df['sample_FORCE_data'][pick_id_1]*2./3.,\n", " # seq_len2=60,\n", " # )['y2'], # max x 1/3 + min x 2/3\n", "\n", " # UtilityPack.mix_two_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][pick_id_0]*0.,\n", " # y1=protein_df['sample_FORCE_data'][pick_id_1]*1.,\n", " # seq_len2=60,\n", " # )['y2'], # max x 0. + min x 1.\n", " # # =======================================\n", " # # mix another two: middle peak 2 + slope 174\n", "\n", " # UtilityPack.mix_two_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][2]*1.,\n", " # y1=protein_df['sample_FORCE_data'][174]*0.,\n", " # seq_len2=60,\n", " # )['y2'], # max x 1 + min x 0\n", "\n", " # UtilityPack.mix_two_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][2]*2./3.,\n", " # y1=protein_df['sample_FORCE_data'][174]*1./3.,\n", " # seq_len2=60,\n", " # )['y2'], # max x 2/3 + min x 1/3\n", "\n", " # UtilityPack.mix_two_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][2]*1./2.,\n", " # y1=protein_df['sample_FORCE_data'][174]*1./2.,\n", " # seq_len2=60,\n", " # )['y2'], # max x 1/2 + min x 1/2\n", "\n", " # UtilityPack.mix_two_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][2]*1./3.,\n", " # y1=protein_df['sample_FORCE_data'][174]*2./3.,\n", " # seq_len2=60,\n", " # )['y2'], # max x 1/3 + min x 2/3\n", "\n", " # UtilityPack.mix_two_ForcPath(\n", " # y0=protein_df['sample_FORCE_data'][2]*0.,\n", " # y1=protein_df['sample_FORCE_data'][174]*1.,\n", " # seq_len2=60,\n", " # )['y2'], # max x 0. + min x 1.\n", "\n", " # ]\n", "\n", " elif CKeys['Problem_ID']==3:\n", " de_novo_test_condition_list = [\n", " [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],\n", " [0, 0.7, 0.07, 0.1, 0.01, 0.02, 0.01, 0.11],\n", " ]\n", " elif CKeys['Problem_ID']==4:\n", " # normalized (Fmax,Toughness)\n", " de_novo_test_condition_list = [\n", " [0.8, 0.8],\n", " [0.8, 0.2],\n", " ]\n", "\n", " elif CKeys['Problem_ID']==5:\n", " de_novo_test_condition_list = [\n", " ['~~~HHHHHHHHHHHHHHH~~'],\n", " ['~~EEESSTTS~SEEEEEEEEE~SBS~EEEEEE~~'],\n", " ]\n", "\n", "\n", "\n", "\n", "\n", "if CKeys['Working_Mode']==2:\n", " #\n", " if TestKeys['Sample_On_DeNovo']:\n", "\n", "\n", " if CKeys['Problem_ID']==2:\n", " TrainerPack.sample_sequence_omegafold_ModelB (\n", " working_model_B,\n", " x_data=NewInput_List,\n", " flag=last_epoch, # flag=\"DeNovo\", # ,\n", " cond_scales=1.,\n", " foldproteins=True, # foldproteins,\n", " # ++++++++++\n", " ynormfac=DataKeys['ynormfac'],\n", " train_unet_number=1, # train_unet_number,\n", " tokenizer_X=tokenizer_X,\n", " Xnormfac=DataKeys['Xnormfac'],\n", " max_length=DataKeys['max_AA_seq_len'], # max_length,\n", " prefix=PKeys['prefix'], # prefix,\n", " tokenizer_y=tokenizer_y,\n", " # ++\n", " CKeys=CKeys,\n", " sample_dir=TestKeys['test_dir'], # sample_dir,\n", " steps=last_step,\n", " e=last_epoch,\n", " IF_showfig= True, # CKeys['SlientRun']!=1,\n", " )\n", "\n", " elif CKeys['Problem_ID']==3:\n", " TrainerPack.sample_sequence_omegafold_ModelA (\n", " # # ----------------------------------------------\n", " # model,\n", " # X=[[0, 0.7, 0.07, 0.1, 0.01, 0.02, 0.01, 0.11]],\n", " # foldproteins=foldproteins,\n", " # flag=steps,cond_scales=1.,\n", " # ++++++++++++++++++++++++++++++++++++++++++++++\n", " working_model,\n", " X=test_condition_list, # [[0.92, 0., 0.04, 0.04, 0., 0., 0., 0., ]], # from text conditioning X\n", " flag=last_epoch, # e+start_ep, # steps, # 0,\n", " cond_scales=1., # cond_scales, # 1.,\n", " foldproteins=True, # False,\n", " X_string=None, # from text conditioning X_string\n", " x_data=None, # from image conditioning x_data\n", " skip_steps=0,\n", " inpaint_images=None, # in formation Y data\n", " inpaint_masks = None,\n", " inpaint_resample_times = None,\n", " init_images = None,\n", " num_cycle=16, # for omegafolding\n", " calc_error=True, # False, # for check on folded results, not used for every case\n", " # ++++++++++++++++++++++++++\n", " # tokenizers\n", " tokenizer_X_forImageCondi=None, # for x_data\n", " Xnormfac_forImageCondi=1.,\n", " tokenizer_X_forTextCondi=None, # for X if NEEDED only\n", " Xnormfac_forTextCondi=DataKeys['Xnormfac'], # 1.,\n", " tokenizer_y=tokenizer_y, # None, # for output Y\n", " ynormfac=DataKeys['ynormfac'], # ynormfac,\n", " # length\n", " train_unet_number=1,\n", " max_length_Y=DataKeys['max_AA_seq_len'], # max_length_Y, # for Y, X_forImageCondi\n", " max_text_len=DataKeys['max_text_len'], # max_text_len_X, # for X_forTextCondi\n", " # other info\n", " steps=last_step, # steps, # None,\n", " e=last_epoch, # e, # None,\n", " sample_dir=TestKeys['test_dir'], # sample_dir, # None,\n", " prefix=PKeys['prefix'], # prefix, # None,\n", " IF_showfig= True, # CKeys['SlientRun']!=1, # True,\n", " CKeys=CKeys,\n", " # TBA to Model B\n", " normalize_X_cond_to_one=False,\n", " )\n", " #\n", " elif CKeys['Problem_ID']==4:\n", " TrainerPack.sample_sequence_omegafold_ModelA (\n", " # # ----------------------------------------------\n", " # model,\n", " # X=[[0, 0.7, 0.07, 0.1, 0.01, 0.02, 0.01, 0.11]],\n", " # foldproteins=foldproteins,\n", " # flag=steps,cond_scales=1.,\n", " # ++++++++++++++++++++++++++++++++++++++++++++++\n", " working_model,\n", " X=test_condition_list, # [[0.92, 0., 0.04, 0.04, 0., 0., 0., 0., ]], # from text conditioning X\n", " flag=last_epoch, # e+start_ep, # steps, # 0,\n", " cond_scales=1., # cond_scales, # 1.,\n", " foldproteins=True, # False,\n", " X_string=None, # from text conditioning X_string\n", " x_data=None, # from image conditioning x_data\n", " skip_steps=0,\n", " inpaint_images=None, # in formation Y data\n", " inpaint_masks = None,\n", " inpaint_resample_times = None,\n", " init_images = None,\n", " num_cycle=16, # for omegafolding\n", " # only for ModelA-SecStr:ProblemID:4\n", " calc_error=False, # True, # False, # for check on folded results, not used for every case\n", " # ++++++++++++++++++++++++++\n", " # tokenizers\n", " tokenizer_X_forImageCondi=None, # for x_data\n", " Xnormfac_forImageCondi=1.,\n", " tokenizer_X_forTextCondi=None, # for X if NEEDED only\n", " Xnormfac_forTextCondi=DataKeys['Xnormfac'], # 1.,\n", " tokenizer_y=tokenizer_y, # None, # for output Y\n", " ynormfac=DataKeys['ynormfac'], # ynormfac,\n", " # length\n", " train_unet_number=1,\n", " max_length_Y=DataKeys['max_AA_seq_len'], # max_length_Y, # for Y, X_forImageCondi\n", " max_text_len=DataKeys['max_text_len'], # max_text_len_X, # for X_forTextCondi\n", " # other info\n", " steps=last_step, # steps, # None,\n", " e=last_epoch, # e, # None,\n", " sample_dir=TestKeys['test_dir'], # sample_dir, # None,\n", " prefix=PKeys['prefix'], # prefix, # None,\n", " IF_showfig= True, # CKeys['SlientRun']!=1, # True,\n", " CKeys=CKeys,\n", " # TBA to Model B\n", " normalize_X_cond_to_one=False,\n", " )\n", " # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n", " # Model B + SecStr\n", " # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " elif CKeys['Problem_ID']==5:\n", " DeNovoSam_pdbs, DeNovoSam_fasta = \\\n", " TrainerPack.sample_sequence_omegafold_pLM_ModelB (\n", " working_model,\n", " x_data=NewInput_List,\n", " flag=last_epoch, # flag=\"DeNovo\", # ,\n", " cond_scales=1.,\n", " foldproteins=True, # foldproteins,\n", " # ++++++++++\n", " ynormfac=DataKeys['ynormfac'],\n", " train_unet_number=1, # train_unet_number,\n", " tokenizer_X=tokenizer_X,\n", " Xnormfac=DataKeys['Xnormfac'],\n", " max_length=DataKeys['max_AA_seq_len'], # max_length,\n", " prefix=PKeys['prefix'], # prefix,\n", " tokenizer_y=tokenizer_y,\n", " # ++\n", " CKeys=CKeys,\n", " sample_dir=TestKeys['test_dir'], # sample_dir,\n", " steps=last_step,\n", " e=last_epoch,\n", " IF_showfig= True, # CKeys['SlientRun']!=1,\n", " # ++\n", " pLM_Model=pLM_Model,\n", " pLM_Model_Name=pLM_Model_Name,\n", " image_channels=DataKeys['image_channels'],\n", " pLM_alphabet=esm_alphabet,\n", " )\n", " #\n", " elif CKeys['Problem_ID']==6:\n", " DeNovoSam_pdbs, DeNovoSam_fasta = \\\n", " TrainerPack.sample_sequence_omegafold_pLM_ModelB_For_ForcPath (\n", " working_model,\n", " x_data=de_novo_test_condition_list[:2],\n", " flag=last_epoch, # flag=\"DeNovo\", # ,\n", " cond_scales=[1., 2.],\n", " foldproteins=True, # foldproteins,\n", " # ++++++++++\n", " ynormfac=DataKeys['ynormfac'],\n", " train_unet_number=1, # train_unet_number,\n", " tokenizer_X=tokenizer_X,\n", " Xnormfac=DataKeys['Xnormfac'],\n", " max_length=DataKeys['max_AA_seq_len'], # max_length,\n", " prefix=PKeys['prefix'], # prefix,\n", " tokenizer_y=tokenizer_y,\n", " # ++\n", " CKeys=CKeys,\n", " sample_dir=TestKeys['test_dir'], # sample_dir,\n", " steps=last_step,\n", " e=last_epoch,\n", " IF_showfig= True, # CKeys['SlientRun']!=1,\n", " # ++\n", " pLM_Model=pLM_Model,\n", " pLM_Model_Name=pLM_Model_Name,\n", " image_channels=DataKeys['image_channels'],\n", " pLM_alphabet=esm_alphabet,\n", " )\n", "\n", " # DeNovoSam_pdbs, DeNovoSam_fasta = \\\n", " # TrainerPack.sample_sequence_omegafold_pLM_ModelB (\n", " # working_model,\n", " # x_data=NewInput_List,\n", " # flag=last_epoch, # flag=\"DeNovo\", # ,\n", " # cond_scales=1.,\n", " # foldproteins=True, # foldproteins,\n", " # # ++++++++++\n", " # ynormfac=DataKeys['ynormfac'],\n", " # train_unet_number=1, # train_unet_number,\n", " # tokenizer_X=tokenizer_X,\n", " # Xnormfac=DataKeys['Xnormfac'],\n", " # max_length=DataKeys['max_AA_seq_len'], # max_length,\n", " # prefix=PKeys['prefix'], # prefix,\n", " # tokenizer_y=tokenizer_y,\n", " # # ++\n", " # CKeys=CKeys,\n", " # sample_dir=TestKeys['test_dir'], # sample_dir,\n", " # steps=last_step,\n", " # e=last_epoch,\n", " # IF_showfig= True, # CKeys['SlientRun']!=1,\n", " # # ++\n", " # pLM_Model=pLM_Model,\n", " # pLM_Model_Name=pLM_Model_Name,\n", " # image_channels=DataKeys['image_channels'],\n", " # pLM_alphabet=esm_alphabet,\n", " # )\n", " #\n", " elif CKeys['Problem_ID']==7:\n", " DeNovoSam_pdbs, fasta_file_list=\\\n", " TrainerPack.sample_sequence_omegafold_pLM_ModelA (\n", " working_model,\n", " X=NewInput_List, # test_condition_list, # [[0.92, 0., 0.04, 0.04, 0., 0., 0., 0., ]], # from text conditioning X\n", " flag=last_epoch, # e+start_ep, # steps, # 0,\n", " cond_scales=1.,\n", " foldproteins=True, # False,\n", " X_string=None, # from text conditioning X_string\n", " x_data=None, # from image conditioning x_data\n", " skip_steps=0,\n", " inpaint_images=None, # in formation Y data\n", " inpaint_masks = None,\n", " inpaint_resample_times = None,\n", " init_images = None,\n", " num_cycle=16, # for omegafolding\n", " calc_error=True, # False, # for check on folded results, not used for every case\n", " # ++++++++++++++++++++++++++\n", " # tokenizers\n", " tokenizer_X_forImageCondi=None, # for x_data\n", " Xnormfac_forImageCondi=1.,\n", " tokenizer_X_forTextCondi=None, # for X if NEEDED only\n", " Xnormfac_forTextCondi=1.,\n", " tokenizer_y=tokenizer_y, # None, # for output Y\n", " ynormfac=DataKeys['ynormfac'], # ynormfac,\n", " # length\n", " train_unet_number=1,\n", " max_length_Y=DataKeys['max_AA_seq_len'], # max_length_Y, # for Y, X_forImageCondi\n", " max_text_len=DataKeys['max_text_len'], # max_text_len_X, # for X_forTextCondi\n", " # other info\n", " steps=last_step, # None,\n", " e=last_epoch, # None,\n", " sample_dir=TestKeys['test_dir'], # None,\n", " prefix=PKeys['prefix'], # None,\n", " IF_showfig= True, # CKeys['SlientRun']!=1, # True,\n", " CKeys=CKeys,\n", " # TBA to Model B\n", " normalize_X_cond_to_one=False,\n", " # ++ for pLM\n", " pLM_Model=pLM_Model,\n", " pLM_Model_Name=pLM_Model_Name,\n", " image_channels=DataKeys['image_channels'], # image_channels,\n", " pLM_alphabet=esm_alphabet,\n", " )\n", "\n", " elif CKeys['Problem_ID']==8:\n", " DeNovoSam_pdbs, fasta_file_list=\\\n", " TrainerPack.sample_sequence_omegafold_pLM_ModelA (\n", " working_model,\n", " X=NewInput_List, # test_condition_list, # [[0.92, 0., 0.04, 0.04, 0., 0., 0., 0., ]], # from text conditioning X\n", " flag=last_epoch, # e+start_ep, # steps, # 0,\n", " cond_scales=1.,\n", " foldproteins=True, # False,\n", " X_string=None, # from text conditioning X_string\n", " x_data=None, # from image conditioning x_data\n", " skip_steps=0,\n", " inpaint_images=None, # in formation Y data\n", " inpaint_masks = None,\n", " inpaint_resample_times = None,\n", " init_images = None,\n", " num_cycle=16, # for omegafolding\n", " calc_error=True, # False, # for check on folded results, not used for every case\n", " # ++++++++++++++++++++++++++\n", " # tokenizers\n", " tokenizer_X_forImageCondi=None, # for x_data\n", " Xnormfac_forImageCondi=1.,\n", " tokenizer_X_forTextCondi=None, # for X if NEEDED only\n", " Xnormfac_forTextCondi=1.,\n", " tokenizer_y=tokenizer_y, # None, # for output Y\n", " ynormfac=DataKeys['ynormfac'], # ynormfac,\n", " # length\n", " train_unet_number=1,\n", " max_length_Y=DataKeys['max_AA_seq_len'], # max_length_Y, # for Y, X_forImageCondi\n", " max_text_len=DataKeys['max_text_len'], # max_text_len_X, # for X_forTextCondi\n", " # other info\n", " steps=last_step, # None,\n", " e=last_epoch, # None,\n", " sample_dir=TestKeys['test_dir'], # None,\n", " prefix=PKeys['prefix'], # None,\n", " IF_showfig= CKeys['SlientRun']!=1, # True,\n", " CKeys=CKeys,\n", " # TBA to Model B\n", " normalize_X_cond_to_one=False,\n", " # ++ for pLM\n", " pLM_Model=pLM_Model,\n", " pLM_Model_Name=pLM_Model_Name,\n", " image_channels=DataKeys['image_channels'], # image_channels,\n", " pLM_alphabet=esm_alphabet,\n", " )\n", "\n", " elif CKeys['Problem_ID']==11:\n", " # plot the input\n", " print(\"On the inputs:\")\n", " fig = plt.figure(figsize=(24,16),dpi=200)\n", " fig, ax0 = plt.subplots()\n", " for ii in range(Sample_for_Num_inputs):\n", " ax0.plot(\n", " de_novo_test_condition_list[ii],\n", " label='Input_'+str(ii+0)+'_to_be_inferred'\n", " )\n", "\n", " plt.legend()\n", " plt.xlabel('Normalized pulling gap')\n", " plt.ylabel('Pulling force (kcal/(mol*Å))')\n", " plt.show()\n", " plt.close()\n", " # conduct inferring\n", " print(\"Now, make design based on the inputs...\")\n", " print(\"\\n\\n\")\n", " DeNovoSam_pdbs, DeNovoSam_fasta = \\\n", " TrainerPack.sample_sequence_omegafold_pLM_ModelB_For_ForcPath (\n", " working_model,\n", " x_data=de_novo_test_condition_list[:Sample_for_Num_inputs], # [:2],\n", " flag=100, #last_epoch, # flag=\"DeNovo\", # ,\n", " cond_scales=[1.],\n", " foldproteins=True, # foldproteins,\n", " # ++++++++++\n", " ynormfac=DataKeys['ynormfac'],\n", " train_unet_number=1, # train_unet_number,\n", " tokenizer_X=tokenizer_X,\n", " Xnormfac=DataKeys['Xnormfac'],\n", " max_length=DataKeys['max_AA_seq_len'], # max_length,\n", " prefix=PKeys['prefix'], # prefix,\n", " tokenizer_y=tokenizer_y,\n", " # ++\n", " CKeys=CKeys,\n", " sample_dir=TestKeys['test_dir'], # sample_dir,\n", " steps=100, # last_step,\n", " e=100, # last_epoch,\n", " IF_showfig= True, # CKeys['SlientRun']!=1,\n", " # ++\n", " pLM_Model=pLM_Model,\n", " pLM_Model_Name=pLM_Model_Name,\n", " image_channels=DataKeys['image_channels'],\n", " pLM_alphabet=esm_alphabet,\n", " )\n", "\n" ] }, { "cell_type": "code", "execution_count": 13, "id": "e0KlqgH44W3W", "metadata": { "id": "e0KlqgH44W3W" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 13, "id": "tuGOEsVx4W6U", "metadata": { "id": "tuGOEsVx4W6U" }, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "A100", "machine_shape": "hm", "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "f6ec7f703621416fb631812d2f8def00": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_68b23bcc31a8453890de44568dfeb1ee", "IPY_MODEL_ef0f7e87f6334939bea151726af03b83", "IPY_MODEL_5d5f291eb6644e6d95d07ba755b56870" ], "layout": "IPY_MODEL_a37c10d569254e958453c189f15cdf9c" } }, "68b23bcc31a8453890de44568dfeb1ee": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e37c525959bc4dc0ad96b94d83153319", "placeholder": "​", "style": "IPY_MODEL_2ab06057ee1e4c11a487f0d1b289373c", "value": "" } }, "ef0f7e87f6334939bea151726af03b83": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "danger", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_595600bfe70f4c0bb5fb200217116674", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_e2893aae1bb7495e96c770bcdf3dfa4d", "value": 0 } }, "5d5f291eb6644e6d95d07ba755b56870": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b7dfef148b8645ea9b2c70103a81b6d0", "placeholder": "​", "style": "IPY_MODEL_d908b047df4f4ea59d37355d673319bc", "value": " 0/? [00:09<?, ?it/s]" } }, "a37c10d569254e958453c189f15cdf9c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e37c525959bc4dc0ad96b94d83153319": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2ab06057ee1e4c11a487f0d1b289373c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "595600bfe70f4c0bb5fb200217116674": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "20px" } }, "e2893aae1bb7495e96c770bcdf3dfa4d": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b7dfef148b8645ea9b2c70103a81b6d0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d908b047df4f4ea59d37355d673319bc": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f833b0f88df14acf8432b8a7098d029f": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_bf0a9c22ce974f2eb796c2275e7dec12", "IPY_MODEL_1521cf912d2f4207866034891baad8d9", "IPY_MODEL_51c23b8303c9450f8cfe8c8015ccfa9d" ], "layout": "IPY_MODEL_9901ee5e2d554071a43888b93ddf0073" } }, "bf0a9c22ce974f2eb796c2275e7dec12": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a1cfeea9cbab45eca372c506825e25f4", "placeholder": "​", "style": "IPY_MODEL_af16e614a3104abfa5179e6b3e066397", "value": "sampling time step: 100%" } }, "1521cf912d2f4207866034891baad8d9": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0f9a7631770a4c64845d95effc1c38ba", "max": 96, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_b164b59990084bed94eca230208166ca", "value": 96 } }, "51c23b8303c9450f8cfe8c8015ccfa9d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7b5706c2f1a84fabbb828a1bacb36b3d", "placeholder": "​", "style": "IPY_MODEL_d04ff74e247443bc8d8127e3f2161c0c", "value": " 96/96 [00:09<00:00, 12.45it/s]" } }, "9901ee5e2d554071a43888b93ddf0073": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a1cfeea9cbab45eca372c506825e25f4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "af16e614a3104abfa5179e6b3e066397": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0f9a7631770a4c64845d95effc1c38ba": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b164b59990084bed94eca230208166ca": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "7b5706c2f1a84fabbb828a1bacb36b3d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d04ff74e247443bc8d8127e3f2161c0c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "nbformat": 4, "nbformat_minor": 5 }