|
@@ -24,7 +24,7 @@ from super_gradients.common.factories.callbacks_factory import CallbacksFactory
|
|
from super_gradients.common.data_types.enum import MultiGPUMode, StrictLoad, EvaluationType
|
|
from super_gradients.common.data_types.enum import MultiGPUMode, StrictLoad, EvaluationType
|
|
from super_gradients.training.models.all_architectures import ARCHITECTURES
|
|
from super_gradients.training.models.all_architectures import ARCHITECTURES
|
|
from super_gradients.common.decorators.factory_decorator import resolve_param
|
|
from super_gradients.common.decorators.factory_decorator import resolve_param
|
|
-from super_gradients.common.environment import env_helpers
|
|
|
|
|
|
+from super_gradients.common.environment import ddp_utils
|
|
from super_gradients.common.abstractions.abstract_logger import get_logger, mute_current_process
|
|
from super_gradients.common.abstractions.abstract_logger import get_logger, mute_current_process
|
|
from super_gradients.common.factories.list_factory import ListFactory
|
|
from super_gradients.common.factories.list_factory import ListFactory
|
|
from super_gradients.common.factories.losses_factory import LossesFactory
|
|
from super_gradients.common.factories.losses_factory import LossesFactory
|
|
@@ -81,7 +81,6 @@ from super_gradients.training.utils.callbacks import (
|
|
ContextSgMethods,
|
|
ContextSgMethods,
|
|
LRCallbackBase,
|
|
LRCallbackBase,
|
|
)
|
|
)
|
|
-from super_gradients.common.environment import environment_config
|
|
|
|
from super_gradients.training.utils import HpmStruct
|
|
from super_gradients.training.utils import HpmStruct
|
|
from super_gradients.training.utils.hydra_utils import load_experiment_cfg, add_params_to_cfg
|
|
from super_gradients.training.utils.hydra_utils import load_experiment_cfg, add_params_to_cfg
|
|
from omegaconf import OmegaConf
|
|
from omegaconf import OmegaConf
|
|
@@ -1433,7 +1432,7 @@ class Trainer:
|
|
logger.warning("\n[WARNING] - Tried running on multiple GPU but only a single GPU is available\n")
|
|
logger.warning("\n[WARNING] - Tried running on multiple GPU but only a single GPU is available\n")
|
|
else:
|
|
else:
|
|
if requested_multi_gpu == MultiGPUMode.AUTO:
|
|
if requested_multi_gpu == MultiGPUMode.AUTO:
|
|
- if env_helpers.is_distributed():
|
|
|
|
|
|
+ if ddp_utils.is_distributed():
|
|
requested_multi_gpu = MultiGPUMode.DISTRIBUTED_DATA_PARALLEL
|
|
requested_multi_gpu = MultiGPUMode.DISTRIBUTED_DATA_PARALLEL
|
|
else:
|
|
else:
|
|
requested_multi_gpu = MultiGPUMode.DATA_PARALLEL
|
|
requested_multi_gpu = MultiGPUMode.DATA_PARALLEL
|
|
@@ -1456,7 +1455,7 @@ class Trainer:
|
|
batch you specify times the number of GPUs. In the literature there are several "best practices" to set
|
|
batch you specify times the number of GPUs. In the literature there are several "best practices" to set
|
|
learning rates and schedules for large batch sizes.
|
|
learning rates and schedules for large batch sizes.
|
|
"""
|
|
"""
|
|
- local_rank = environment_config.DDP_LOCAL_RANK
|
|
|
|
|
|
+ local_rank = ddp_utils.DDP_LOCAL_RANK
|
|
if local_rank > 0:
|
|
if local_rank > 0:
|
|
mute_current_process()
|
|
mute_current_process()
|
|
|
|
|