Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#309 Fix scale between rescaling batches

Merged
Ghost merged 1 commits into Deci-AI:master from deci-ai:feature/SG-221-make_multiscale_keep_state
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
  1. import unittest
  2. import numpy as np
  3. from super_gradients.training import SgModel
  4. from super_gradients.training.metrics import Accuracy
  5. from super_gradients.training.datasets import ClassificationTestDatasetInterface
  6. from super_gradients.training.models import LeNet
  7. from super_gradients.training.utils.callbacks import TestLRCallback, LRCallbackBase, Phase
  8. class ExponentialWarmupLRCallback(LRCallbackBase):
  9. """
  10. LR scheduling callback for exponential warmup.
  11. LR grows exponentially from warmup_initial_lr to initial lr.
  12. When warmup_initial_lr is None- LR climb starts from 0.001
  13. """
  14. def __init__(self, **kwargs):
  15. super().__init__(Phase.TRAIN_EPOCH_START, **kwargs)
  16. self.warmup_initial_lr = self.training_params.warmup_initial_lr or 0.001
  17. warmup_epochs = self.training_params.lr_warmup_epochs
  18. lr_start = self.warmup_initial_lr
  19. lr_end = self.initial_lr
  20. self.c1 = (lr_end - lr_start) / (np.exp(warmup_epochs) - 1.)
  21. self.c2 = (lr_start * np.exp(warmup_epochs) - lr_end) / (np.exp(warmup_epochs) - 1.)
  22. def perform_scheduling(self, context):
  23. self.lr = self.c1 * np.exp(context.epoch) + self.c2
  24. self.update_lr(context.optimizer, context.epoch, None)
  25. def is_lr_scheduling_enabled(self, context):
  26. return self.training_params.lr_warmup_epochs >= context.epoch
  27. class LRWarmupTest(unittest.TestCase):
  28. def setUp(self) -> None:
  29. self.dataset_params = {"batch_size": 4}
  30. self.dataset = ClassificationTestDatasetInterface(dataset_params=self.dataset_params)
  31. self.arch_params = {'num_classes': 10}
  32. def test_lr_warmup(self):
  33. # Define Model
  34. net = LeNet()
  35. model = SgModel("lr_warmup_test", model_checkpoints_location='local')
  36. model.connect_dataset_interface(self.dataset)
  37. model.build_model(net, arch_params=self.arch_params)
  38. lrs = []
  39. phase_callbacks = [TestLRCallback(lr_placeholder=lrs)]
  40. train_params = {"max_epochs": 5, "lr_updates": [], "lr_decay_factor": 0.1, "lr_mode": "step",
  41. "lr_warmup_epochs": 3, "initial_lr": 1, "loss": "cross_entropy", "optimizer": 'SGD',
  42. "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
  43. "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()],
  44. "loss_logging_items_names": ["Loss"], "metric_to_watch": "Accuracy",
  45. "greater_metric_to_watch_is_better": True, "ema": False, "phase_callbacks": phase_callbacks,
  46. "warmup_mode": "linear_step"}
  47. expected_lrs = [0.25, 0.5, 0.75, 1.0, 1.0]
  48. model.train(train_params)
  49. self.assertListEqual(lrs, expected_lrs)
  50. def test_lr_warmup_with_lr_scheduling(self):
  51. # Define Model
  52. net = LeNet()
  53. model = SgModel("lr_warmup_test", model_checkpoints_location='local')
  54. model.connect_dataset_interface(self.dataset)
  55. model.build_model(net, arch_params=self.arch_params)
  56. lrs = []
  57. phase_callbacks = [TestLRCallback(lr_placeholder=lrs)]
  58. train_params = {"max_epochs": 5, "cosine_final_lr_ratio": 0.2, "lr_mode": "cosine",
  59. "lr_warmup_epochs": 3, "initial_lr": 1, "loss": "cross_entropy", "optimizer": 'SGD',
  60. "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
  61. "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()],
  62. "loss_logging_items_names": ["Loss"], "metric_to_watch": "Accuracy",
  63. "greater_metric_to_watch_is_better": True, "ema": False, "phase_callbacks": phase_callbacks,
  64. "warmup_mode": "linear_step"}
  65. expected_lrs = [0.25, 0.5, 0.75, 0.9236067977499791, 0.4763932022500211]
  66. model.train(train_params)
  67. # ALTHOUGH NOT SEEN IN HERE, THE 4TH EPOCH USES LR=1, SO THIS IS THE EXPECTED LIST AS WE COLLECT
  68. # THE LRS AFTER THE UPDATE
  69. self.assertListEqual(lrs, expected_lrs)
  70. def test_warmup_initial_lr(self):
  71. # Define Model
  72. net = LeNet()
  73. model = SgModel("test_warmup_initial_lr", model_checkpoints_location='local')
  74. model.connect_dataset_interface(self.dataset)
  75. model.build_model(net, arch_params=self.arch_params)
  76. lrs = []
  77. phase_callbacks = [TestLRCallback(lr_placeholder=lrs)]
  78. train_params = {"max_epochs": 5, "lr_updates": [], "lr_decay_factor": 0.1, "lr_mode": "step",
  79. "lr_warmup_epochs": 3, "loss": "cross_entropy", "optimizer": 'SGD',
  80. "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
  81. "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()],
  82. "loss_logging_items_names": ["Loss"], "metric_to_watch": "Accuracy",
  83. "greater_metric_to_watch_is_better": True, "ema": False, "phase_callbacks": phase_callbacks,
  84. "warmup_mode": "linear_step", "initial_lr": 1, "warmup_initial_lr": 4.}
  85. expected_lrs = [4., 3., 2., 1., 1.]
  86. model.train(train_params)
  87. self.assertListEqual(lrs, expected_lrs)
  88. def test_custom_lr_warmup(self):
  89. # Define Model
  90. net = LeNet()
  91. model = SgModel("custom_lr_warmup_test", model_checkpoints_location='local')
  92. model.connect_dataset_interface(self.dataset)
  93. model.build_model(net, arch_params=self.arch_params)
  94. lrs = []
  95. phase_callbacks = [TestLRCallback(lr_placeholder=lrs)]
  96. train_params = {"max_epochs": 5, "lr_updates": [], "lr_decay_factor": 0.1, "lr_mode": "step",
  97. "lr_warmup_epochs": 3, "loss": "cross_entropy", "optimizer": 'SGD',
  98. "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
  99. "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()],
  100. "loss_logging_items_names": ["Loss"], "metric_to_watch": "Accuracy",
  101. "greater_metric_to_watch_is_better": True, "ema": False, "phase_callbacks": phase_callbacks,
  102. "warmup_mode": ExponentialWarmupLRCallback, "initial_lr": 1., "warmup_initial_lr": 0.1}
  103. expected_lrs = [0.1, 0.18102751585334242, 0.40128313980266034, 1.0, 1.0]
  104. model.train(train_params)
  105. self.assertListEqual(lrs, expected_lrs)
  106. if __name__ == '__main__':
  107. unittest.main()
Discard
Tip!

Press p or to see the previous file or, n or to see the next file