delete first version of dist multitask example

Browse files

Files changed (1) hide show

examples/distrbuted_multitask_cell_classification.ipynb +0 -149

examples/distrbuted_multitask_cell_classification.ipynb DELETED Viewed

@@ -1,149 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b3266a7b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import torch\n",
-    "from geneformer import MTLClassifier"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3e12ac9f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Define paths\n",
-    "pretrained_path = \"/path/to/pretrained/Geneformer/model\" \n",
-    "# input data is tokenized rank value encodings generated by Geneformer tokenizer (see tokenizing_scRNAseq_data.ipynb)\n",
-    "train_path = \"/path/to/train/data.dataset\"\n",
-    "val_path = \"/path/to/val/data.dataset\"\n",
-    "test_path = \"/path/to/test/data.dataset\"\n",
-    "results_dir = \"/path/to/results/directory\"\n",
-    "model_save_path = \"/path/to/model/save/path\"\n",
-    "tensorboard_log_dir = \"/path/to/tensorboard/log/dir\"\n",
-    "\n",
-    "# Define tasks and hyperparameters\n",
-    "# task_columns should be a list of column names from your dataset\n",
-    "# Each column represents a specific classification task (e.g. cell type, disease state)\n",
-    "task_columns = [\"cell_type\", \"disease_state\"]  # Example task columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c9bd7562",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Check GPU environment\n",
-    "num_gpus = torch.cuda.device_count()\n",
-    "use_distributed = num_gpus > 1\n",
-    "print(f\"Number of GPUs detected: {num_gpus}\")\n",
-    "print(f\"Using distributed training: {use_distributed}\")\n",
-    "\n",
-    "# Set environment variables for distributed training when multiple GPUs are available\n",
-    "if use_distributed:\n",
-    "    os.environ[\"MASTER_ADDR\"] = \"localhost\"  # hostname\n",
-    "    os.environ[\"MASTER_PORT\"] = \"12355\"      # Choose an available port\n",
-    "    print(\"Distributed environment variables set.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b6ff3618",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Define Hyperparameters for Optimization\n",
-    "hyperparameters = {\n",
-    "    \"learning_rate\": {\"type\": \"float\", \"low\": 1e-5, \"high\": 1e-3, \"log\": True},\n",
-    "    \"warmup_ratio\": {\"type\": \"float\", \"low\": 0.005, \"high\": 0.01},\n",
-    "    \"weight_decay\": {\"type\": \"float\", \"low\": 0.01, \"high\": 0.1},\n",
-    "    \"dropout_rate\": {\"type\": \"float\", \"low\": 0.0, \"high\": 0.7},\n",
-    "    \"lr_scheduler_type\": {\"type\": \"categorical\", \"choices\": [\"cosine\"]},\n",
-    "    \"task_weights\": {\"type\": \"float\", \"low\": 0.1, \"high\": 2.0},\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f665c5a7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mc = MTLClassifier(\n",
-    "    task_columns=task_columns,  # Our defined classification tasks\n",
-    "    study_name=\"MTLClassifier_distributed\",\n",
-    "    pretrained_path=pretrained_path,\n",
-    "    train_path=train_path,\n",
-    "    val_path=val_path,\n",
-    "    test_path=test_path,\n",
-    "    model_save_path=model_save_path,\n",
-    "    results_dir=results_dir,\n",
-    "    tensorboard_log_dir=tensorboard_log_dir,\n",
-    "    hyperparameters=hyperparameters,\n",
-    "    # Distributed training parameters\n",
-    "    distributed_training=use_distributed,  # Enable distributed training if multiple GPUs available\n",
-    "    master_addr=\"localhost\" if use_distributed else None,\n",
-    "    master_port=\"12355\" if use_distributed else None,\n",
-    "    # Other training parameters\n",
-    "    n_trials=15,  # Number of trials for hyperparameter optimization\n",
-    "    epochs=1,     # Number of training epochs (1 suggested to prevent overfitting)\n",
-    "    batch_size=8, # Adjust based on available GPU memory\n",
-    "    gradient_accumulation_steps=4,  # Accumulate gradients over multiple steps\n",
-    "    gradient_clipping=True,         # Enable gradient clipping for stability\n",
-    "    max_grad_norm=1.0,              # Set maximum gradient norm\n",
-    "    seed=42\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f69f7b6a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Run Hyperparameter Optimization with Distributed Training\n",
-    "if __name__ == \"__main__\":\n",
-    "    # This guard is required for distributed training to prevent\n",
-    "    # infinite subprocess spawning when using torch.multiprocessing\n",
-    "    mc.run_optuna_study()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3affd5dd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Evaluate the Model on Test Data\n",
-    "if __name__ == \"__main__\":\n",
-    "    mc.load_and_evaluate_test_model()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "bio",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}