| { |
| "device": "$torch.device('cuda:' + os.environ['LOCAL_RANK'])", |
| "network": { |
| "_target_": "torch.nn.parallel.DistributedDataParallel", |
| "module": "$@network_def.to(@device)", |
| "device_ids": [ |
| "@device" |
| ], |
| "find_unused_parameters": true |
| }, |
| "train#sampler": { |
| "_target_": "DistributedSampler", |
| "dataset": "@train#dataset", |
| "even_divisible": true, |
| "shuffle": true |
| }, |
| "train#dataloader#sampler": "@train#sampler", |
| "train#dataloader#shuffle": false, |
| "train#trainer#train_handlers": "$@train#handlers[: -2 if dist.get_rank() > 0 else None]", |
| "validate#sampler": { |
| "_target_": "DistributedSampler", |
| "dataset": "@validate#dataset", |
| "even_divisible": false, |
| "shuffle": false |
| }, |
| "validate#dataloader#sampler": "@validate#sampler", |
| "validate#evaluator#val_handlers": "$None if dist.get_rank() > 0 else @validate#handlers", |
| "initialize": [ |
| "$import torch.distributed as dist", |
| "$dist.is_initialized() or dist.init_process_group(backend='nccl')", |
| "$torch.cuda.set_device(@device)", |
| "$monai.utils.set_determinism(seed=123)", |
| "$import logging", |
| "$@train#trainer.logger.setLevel(logging.WARNING if dist.get_rank() > 0 else logging.INFO)", |
| "$@validate#evaluator.logger.setLevel(logging.WARNING if dist.get_rank() > 0 else logging.INFO)" |
| ], |
| "run": [ |
| "$@train#trainer.run()" |
| ], |
| "finalize": [ |
| "$dist.is_initialized() and dist.destroy_process_group()" |
| ] |
| } |
|
|