Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#548 Split and rename the modules from super_gradients.common.environment

Merged
Ghost merged 1 commits into Deci-AI:master from deci-ai:hotfix/SG-000-refactor_environment_package
@@ -24,7 +24,7 @@ from super_gradients.common.factories.callbacks_factory import CallbacksFactory
 from super_gradients.common.data_types.enum import MultiGPUMode, StrictLoad, EvaluationType
 from super_gradients.common.data_types.enum import MultiGPUMode, StrictLoad, EvaluationType
 from super_gradients.training.models.all_architectures import ARCHITECTURES
 from super_gradients.training.models.all_architectures import ARCHITECTURES
 from super_gradients.common.decorators.factory_decorator import resolve_param
 from super_gradients.common.decorators.factory_decorator import resolve_param
-from super_gradients.common.environment import env_helpers
+from super_gradients.common.environment import ddp_utils
 from super_gradients.common.abstractions.abstract_logger import get_logger, mute_current_process
 from super_gradients.common.abstractions.abstract_logger import get_logger, mute_current_process
 from super_gradients.common.factories.list_factory import ListFactory
 from super_gradients.common.factories.list_factory import ListFactory
 from super_gradients.common.factories.losses_factory import LossesFactory
 from super_gradients.common.factories.losses_factory import LossesFactory
@@ -81,7 +81,6 @@ from super_gradients.training.utils.callbacks import (
     ContextSgMethods,
     ContextSgMethods,
     LRCallbackBase,
     LRCallbackBase,
 )
 )
-from super_gradients.common.environment import environment_config
 from super_gradients.training.utils import HpmStruct
 from super_gradients.training.utils import HpmStruct
 from super_gradients.training.utils.hydra_utils import load_experiment_cfg, add_params_to_cfg
 from super_gradients.training.utils.hydra_utils import load_experiment_cfg, add_params_to_cfg
 from omegaconf import OmegaConf
 from omegaconf import OmegaConf
@@ -1433,7 +1432,7 @@ class Trainer:
                         logger.warning("\n[WARNING] - Tried running on multiple GPU but only a single GPU is available\n")
                         logger.warning("\n[WARNING] - Tried running on multiple GPU but only a single GPU is available\n")
                 else:
                 else:
                     if requested_multi_gpu == MultiGPUMode.AUTO:
                     if requested_multi_gpu == MultiGPUMode.AUTO:
-                        if env_helpers.is_distributed():
+                        if ddp_utils.is_distributed():
                             requested_multi_gpu = MultiGPUMode.DISTRIBUTED_DATA_PARALLEL
                             requested_multi_gpu = MultiGPUMode.DISTRIBUTED_DATA_PARALLEL
                         else:
                         else:
                             requested_multi_gpu = MultiGPUMode.DATA_PARALLEL
                             requested_multi_gpu = MultiGPUMode.DATA_PARALLEL
@@ -1456,7 +1455,7 @@ class Trainer:
         batch you specify times the number of GPUs. In the literature there are several "best practices" to set
         batch you specify times the number of GPUs. In the literature there are several "best practices" to set
         learning rates and schedules for large batch sizes.
         learning rates and schedules for large batch sizes.
         """
         """
-        local_rank = environment_config.DDP_LOCAL_RANK
+        local_rank = ddp_utils.DDP_LOCAL_RANK
         if local_rank > 0:
         if local_rank > 0:
             mute_current_process()
             mute_current_process()
 
 
Discard
@@ -11,9 +11,10 @@ from torch.distributed.elastic.multiprocessing.errors import record
 from torch.distributed.launcher.api import LaunchConfig, elastic_launch
 from torch.distributed.launcher.api import LaunchConfig, elastic_launch
 
 
 from super_gradients.common.data_types.enum import MultiGPUMode
 from super_gradients.common.data_types.enum import MultiGPUMode
-from super_gradients.common.environment.env_helpers import find_free_port, is_distributed
+from super_gradients.common.environment.argparse_utils import EXTRA_ARGS
+from super_gradients.common.environment.ddp_utils import find_free_port, is_distributed
 from super_gradients.common.abstractions.abstract_logger import get_logger
 from super_gradients.common.abstractions.abstract_logger import get_logger
-from super_gradients.common.environment import environment_config
+
 
 
 logger = get_logger(__name__)
 logger = get_logger(__name__)
 
 
@@ -238,7 +239,7 @@ def restart_script_with_ddp(num_gpus: int = None):
         metrics_cfg={},
         metrics_cfg={},
     )
     )
 
 
-    elastic_launch(config=config, entrypoint=sys.executable)(*sys.argv, *environment_config.EXTRA_ARGS)
+    elastic_launch(config=config, entrypoint=sys.executable)(*sys.argv, *EXTRA_ARGS)
 
 
     # The code below should actually never be reached as the process will be in a loop inside elastic_launch until any subprocess crashes.
     # The code below should actually never be reached as the process will be in a loop inside elastic_launch until any subprocess crashes.
     sys.exit("Main process finished")
     sys.exit("Main process finished")
Discard