Deci-AI
/
super-gradients
connected to https://github.com/Deci-AI/super-gradients.git


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
85

	
86

	
87

	
88

	
89

	
90

	
91

	
92

	
93

	
94

	
95

	
96

	
97

	
98

	
99

	
100

	
101

	
102

	
103

	
104

	
105

	
106

	
107

	
108

	
109

	
110

	
111

	
112

	
            resume: False # whether to continue training from ckpt with the same experiment name.
resume_path: # Explicit checkpoint path (.pth file) to use to resume training.
ckpt_name: ckpt_latest.pth  # The checkpoint (.pth file) filename in CKPT_ROOT_DIR/EXPERIMENT_NAME/ to use when resume=True and resume_path=None
lr_mode: # Learning rate scheduling policy, one of ['step','poly','cosine','function']
lr_schedule_function: # Learning rate scheduling function to be used when `lr_mode` is 'function'.
lr_warmup_epochs: 0 # number of epochs for learning rate warm up - see https://arxiv.org/pdf/1706.02677.pdf (Section 2.2).
lr_cooldown_epochs: 0 # epochs to cooldown LR (i.e the last epoch from scheduling view point=max_epochs-cooldown)
warmup_initial_lr: # Initial lr for linear_step. When none is given, initial_lr/(warmup_epochs+1) will be used.
step_lr_update_freq: # (float) update frequency in epoch units for computing lr_updates when lr_mode=`step`.
cosine_final_lr_ratio: 0.01 # final learning rate ratio (only relevant when `lr_mode`='cosine')
warmup_mode: linear_step # learning rate warmup scheme, currently only 'linear_step' is supported

lr_updates:
  _target_: super_gradients.training.utils.utils.empty_list # This is a workaround to instantiate a list using _target_. If we would instantiate as "lr_updates: []",
                                                            # we would get an error every time we would want to overwrite lr_updates with a numpy array.

pre_prediction_callback: # callback modifying images and targets right before forward pass.

optimizer: SGD # Optimization algorithm. One of ['Adam','SGD','RMSProp'] corresponding to the torch.optim optimizers
optimizer_params: {} # when `optimizer` is one of ['Adam','SGD','RMSProp'], it will be initialized with optimizer_params.
load_opt_params: True # Whether to load the optimizers parameters as well when loading a model's checkpoint
zero_weight_decay_on_bias_and_bn: False # whether to apply weight decay on batch normalization parameters or not


loss: # Loss function for training (str as one of SuperGradient's built in options, or torch.nn.module)
criterion_params: {} # when `loss` is one of SuperGradient's built in options, it will be initialized with criterion_params.


ema: False # whether to use Model Exponential Moving Average
ema_params: # parameters for the ema model.
  decay: 0.9999
  beta: 15
  exp_activation: True


train_metrics_list: [] # Metrics to log during training. For more information on torchmetrics see https://torchmetrics.rtfd.io/en/latest/.
valid_metrics_list: [] # Metrics to log during validation. For more information on torchmetrics see https://torchmetrics.rtfd.io/en/latest/
metric_to_watch: Accuracy # will be the metric which the model checkpoint will be saved according to
greater_metric_to_watch_is_better: True # When choosing a model's checkpoint to be saved, the best achieved model is the one that maximizes the metric_to_watch when this parameter is set to True


launch_tensorboard: False # Whether to launch a TensorBoard process.
tensorboard_port: # port for tensorboard process
tb_files_user_prompt: False  # Asks User for Tensorboard Deletion Prompt
save_tensorboard_to_s3: False # whether to save tb to s3


precise_bn: False # Whether to use precise_bn calculation during the training.
precise_bn_batch_size: # the effective batch size we want to calculate the batchnorm on.


silent_mode: False  # Silents the Print outs


mixed_precision: False # Whether to use mixed precision or not.


save_ckpt_epoch_list: []  # indices where the ckpt will save automatically


average_best_models: True # If set, a snapshot dictionary file and the average model will be saved


dataset_statistics: False  # add a dataset statistical analysis and sample images to tensorboard


batch_accumulate: 1  # number of batches to accumulate before every backward pass


run_validation_freq: 1 # The frequency in which validation is performed during training


save_model: True # Whether to save the model checkpoints


seed: 42 # seed for reproducibility


phase_callbacks: [] # list of callbacks to be applied at specific phases.


log_installed_packages: True # when set, the list of all installed packages (and their versions) will be written to the tensorboard


save_full_train_log: False # When set, a full log (of all super_gradients modules, including uncaught exceptions from any other module) of training will be saved

clip_grad_norm : # Defines a maximal L2 norm of the gradients. Values which exceed the given value will be clipped

ckpt_best_name: ckpt_best.pth

enable_qat: False # enables quantization aware training

qat_params:
  start_epoch: 0 # int, first epoch to start QAT. Must be lower than `max_epochs`.
  quant_modules_calib_method: percentile # str, One of [percentile, mse, entropy, max]. Statistics method for amax computation of the quantized modules.
  per_channel_quant_modules: False # bool, whether quant modules should be per channel.
  calibrate: True # bool, whether to perfrom calibration.
  calibrated_model_path: # str, path to a calibrated checkpoint (default=None).
  calib_data_loader: # torch.utils.data.DataLoader, data loader of the calibration dataset. When None, context.train_loader will be used (default=None).
  num_calib_batches: 2 # int, number of batches to collect the statistics from.
  percentile: 99.99 # float, percentile value to use when quant_modules_calib_method='percentile'. Discarded when other methods are used (Default=99.99).

sg_logger: base_sg_logger
sg_logger_params:
  tb_files_user_prompt: False # Asks User for Tensorboard Deletion Prompt
  launch_tensorboard: False
  tensorboard_port:
  save_checkpoints_remote: False  # upload checkpoint files to s3
  save_tensorboard_remote: False  # upload tensorboard files to s3
  save_logs_remote: False  # upload log files to s3

_convert_: all