| NUM_PROC=$1 | |
| shift | |
| # the localhost fixes are from https://github.com/pytorch/pytorch/issues/73320 | |
| torchrun --rdzv_backend c10d --rdzv_endpoint localhost:0 --nproc_per_node=$NUM_PROC train.py "$@" |
| NUM_PROC=$1 | |
| shift | |
| # the localhost fixes are from https://github.com/pytorch/pytorch/issues/73320 | |
| torchrun --rdzv_backend c10d --rdzv_endpoint localhost:0 --nproc_per_node=$NUM_PROC train.py "$@" |