1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
|
- <!DOCTYPE html>
- <html class="writer-html5" lang="en" >
- <head>
- <meta charset="utf-8" />
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
- <title>super_gradients.training.utils.optimizer_utils — SuperGradients 1.0 documentation</title>
- <link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" />
- <link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
- <link rel="stylesheet" href="../../../../_static/graphviz.css" type="text/css" />
- <!--[if lt IE 9]>
- <script src="../../../../_static/js/html5shiv.min.js"></script>
- <![endif]-->
-
- <script data-url_root="../../../../" id="documentation_options" src="../../../../_static/documentation_options.js"></script>
- <script src="../../../../_static/jquery.js"></script>
- <script src="../../../../_static/underscore.js"></script>
- <script src="../../../../_static/doctools.js"></script>
- <script src="../../../../_static/js/theme.js"></script>
- <link rel="index" title="Index" href="../../../../genindex.html" />
- <link rel="search" title="Search" href="../../../../search.html" />
- </head>
- <body class="wy-body-for-nav">
- <div class="wy-grid-for-nav">
- <nav data-toggle="wy-nav-shift" class="wy-nav-side">
- <div class="wy-side-scroll">
- <div class="wy-side-nav-search" >
- <a href="../../../../index.html" class="icon icon-home"> SuperGradients
- </a>
- <div role="search">
- <form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
- <input type="text" name="q" placeholder="Search docs" />
- <input type="hidden" name="check_keywords" value="yes" />
- <input type="hidden" name="area" value="default" />
- </form>
- </div>
- </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
- <p class="caption"><span class="caption-text">Welcome To SuperGradients</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../../../../welcome.html">Fill our 4-question quick survey! We will raffle free SuperGradients swag between those who will participate -> Fill Survey</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../welcome.html#supergradients">SuperGradients</a></li>
- </ul>
- <p class="caption"><span class="caption-text">Technical Documentation</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../../../../super_gradients.common.html">Common package</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../super_gradients.training.html">Training package</a></li>
- </ul>
- <p class="caption"><span class="caption-text">User Guide</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html">What is SuperGradients?</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html#introducing-the-supergradients-library">Introducing the SuperGradients library</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html#installation">Installation</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html#integrating-your-training-code-complete-walkthrough">Integrating Your Training Code - Complete Walkthrough</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html#training-parameters">Training Parameters</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html#logs-and-checkpoints">Logs and Checkpoints</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html#dataset-parameters">Dataset Parameters</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html#network-architectures">Network Architectures</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html#pretrained-models">Pretrained Models</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html#how-to-reproduce-our-training-recipes">How To Reproduce Our Training Recipes</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html#professional-tools-integration">Professional Tools Integration</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../../../../user_guide.html#supergradients-faq">SuperGradients FAQ</a></li>
- </ul>
- </div>
- </div>
- </nav>
- <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
- <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
- <a href="../../../../index.html">SuperGradients</a>
- </nav>
- <div class="wy-nav-content">
- <div class="rst-content">
- <div role="navigation" aria-label="Page navigation">
- <ul class="wy-breadcrumbs">
- <li><a href="../../../../index.html" class="icon icon-home"></a> »</li>
- <li><a href="../../../index.html">Module code</a> »</li>
- <li>super_gradients.training.utils.optimizer_utils</li>
- <li class="wy-breadcrumbs-aside">
- </li>
- </ul>
- <hr/>
- </div>
- <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
- <div itemprop="articleBody">
-
- <h1>Source code for super_gradients.training.utils.optimizer_utils</h1><div class="highlight"><pre>
- <span></span><span class="kn">import</span> <span class="nn">torch.optim</span> <span class="k">as</span> <span class="nn">optim</span>
- <span class="kn">import</span> <span class="nn">torch.nn</span> <span class="k">as</span> <span class="nn">nn</span>
- <span class="kn">from</span> <span class="nn">torch.nn.modules.batchnorm</span> <span class="kn">import</span> <span class="n">_BatchNorm</span>
- <span class="kn">from</span> <span class="nn">torch.nn.modules.conv</span> <span class="kn">import</span> <span class="n">_ConvNd</span>
- <span class="kn">from</span> <span class="nn">super_gradients.common.abstractions.abstract_logger</span> <span class="kn">import</span> <span class="n">get_logger</span>
- <span class="kn">from</span> <span class="nn">super_gradients.common.factories.optimizers_type_factory</span> <span class="kn">import</span> <span class="n">OptimizersTypeFactory</span>
- <span class="kn">from</span> <span class="nn">super_gradients.training.params</span> <span class="kn">import</span> <span class="n">DEFAULT_OPTIMIZER_PARAMS_SGD</span><span class="p">,</span> <span class="n">DEFAULT_OPTIMIZER_PARAMS_ADAM</span><span class="p">,</span> \
- <span class="n">DEFAULT_OPTIMIZER_PARAMS_RMSPROP</span><span class="p">,</span> <span class="n">DEFAULT_OPTIMIZER_PARAMS_RMSPROPTF</span>
- <span class="kn">from</span> <span class="nn">super_gradients.training.utils</span> <span class="kn">import</span> <span class="n">get_param</span>
- <span class="kn">from</span> <span class="nn">super_gradients.training.utils.optimizers.rmsprop_tf</span> <span class="kn">import</span> <span class="n">RMSpropTF</span>
- <span class="n">logger</span> <span class="o">=</span> <span class="n">get_logger</span><span class="p">(</span><span class="vm">__name__</span><span class="p">)</span>
- <span class="n">OPTIMIZERS_DEFAULT_PARAMS</span> <span class="o">=</span> <span class="p">{</span><span class="n">optim</span><span class="o">.</span><span class="n">SGD</span><span class="p">:</span> <span class="n">DEFAULT_OPTIMIZER_PARAMS_SGD</span><span class="p">,</span>
- <span class="n">optim</span><span class="o">.</span><span class="n">Adam</span><span class="p">:</span> <span class="n">DEFAULT_OPTIMIZER_PARAMS_ADAM</span><span class="p">,</span>
- <span class="n">optim</span><span class="o">.</span><span class="n">RMSprop</span><span class="p">:</span> <span class="n">DEFAULT_OPTIMIZER_PARAMS_RMSPROP</span><span class="p">,</span>
- <span class="n">RMSpropTF</span><span class="p">:</span> <span class="n">DEFAULT_OPTIMIZER_PARAMS_RMSPROPTF</span><span class="p">}</span>
- <div class="viewcode-block" id="separate_zero_wd_params_groups_for_optimizer"><a class="viewcode-back" href="../../../../super_gradients.training.utils.html#super_gradients.training.utils.optimizer_utils.separate_zero_wd_params_groups_for_optimizer">[docs]</a><span class="k">def</span> <span class="nf">separate_zero_wd_params_groups_for_optimizer</span><span class="p">(</span><span class="n">module</span><span class="p">:</span> <span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">,</span> <span class="n">net_named_params</span><span class="p">,</span> <span class="n">weight_decay</span><span class="p">:</span> <span class="nb">float</span><span class="p">):</span>
- <span class="sd">"""</span>
- <span class="sd"> separate param groups for batchnorm and biases and others with weight decay. return list of param groups in format</span>
- <span class="sd"> required by torch Optimizer classes.</span>
- <span class="sd"> bias + BN with weight decay=0 and the rest with the given weight decay</span>
- <span class="sd"> :param module: train net module.</span>
- <span class="sd"> :param net_named_params: list of params groups, output of SgModule.initialize_param_groups</span>
- <span class="sd"> :param weight_decay: value to set for the non BN and bias parameters</span>
- <span class="sd"> """</span>
- <span class="c1"># FIXME - replace usage of ids addresses to find batchnorm and biases params.</span>
- <span class="c1"># This solution iterate 2 times over module parameters, find a way to iterate only one time.</span>
- <span class="n">no_decay_ids</span> <span class="o">=</span> <span class="n">_get_no_decay_param_ids</span><span class="p">(</span><span class="n">module</span><span class="p">)</span>
- <span class="c1"># split param groups for optimizer</span>
- <span class="n">optimizer_param_groups</span> <span class="o">=</span> <span class="p">[]</span>
- <span class="k">for</span> <span class="n">param_group</span> <span class="ow">in</span> <span class="n">net_named_params</span><span class="p">:</span>
- <span class="n">no_decay_params</span> <span class="o">=</span> <span class="p">[]</span>
- <span class="n">decay_params</span> <span class="o">=</span> <span class="p">[]</span>
- <span class="k">for</span> <span class="n">name</span><span class="p">,</span> <span class="n">param</span> <span class="ow">in</span> <span class="n">param_group</span><span class="p">[</span><span class="s2">"named_params"</span><span class="p">]:</span>
- <span class="k">if</span> <span class="nb">id</span><span class="p">(</span><span class="n">param</span><span class="p">)</span> <span class="ow">in</span> <span class="n">no_decay_ids</span><span class="p">:</span>
- <span class="n">no_decay_params</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">param</span><span class="p">)</span>
- <span class="k">else</span><span class="p">:</span>
- <span class="n">decay_params</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">param</span><span class="p">)</span>
- <span class="c1"># append two param groups from the original param group, with and without weight decay.</span>
- <span class="n">extra_optim_params</span> <span class="o">=</span> <span class="p">{</span><span class="n">key</span><span class="p">:</span> <span class="n">param_group</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">param_group</span>
- <span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">"named_params"</span><span class="p">,</span> <span class="s2">"weight_decay"</span><span class="p">]}</span>
- <span class="n">optimizer_param_groups</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s2">"params"</span><span class="p">:</span> <span class="n">no_decay_params</span><span class="p">,</span> <span class="s2">"weight_decay"</span><span class="p">:</span> <span class="mf">0.0</span><span class="p">,</span> <span class="o">**</span><span class="n">extra_optim_params</span><span class="p">})</span>
- <span class="n">optimizer_param_groups</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s2">"params"</span><span class="p">:</span> <span class="n">decay_params</span><span class="p">,</span> <span class="s2">"weight_decay"</span><span class="p">:</span> <span class="n">weight_decay</span><span class="p">,</span> <span class="o">**</span><span class="n">extra_optim_params</span><span class="p">})</span>
- <span class="k">return</span> <span class="n">optimizer_param_groups</span></div>
- <span class="k">def</span> <span class="nf">_get_no_decay_param_ids</span><span class="p">(</span><span class="n">module</span><span class="p">:</span> <span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
- <span class="c1"># FIXME - replace usage of ids addresses to find batchnorm and biases params.</span>
- <span class="c1"># Use other common way to identify torch parameters other than id or layer names</span>
- <span class="sd">"""</span>
- <span class="sd"> Iterate over module.modules() and returns params id addresses of batch-norm and biases params.</span>
- <span class="sd"> NOTE - ALL MODULES WITH ATTRIBUTES NAMED BIAS AND ARE INSTANCE OF nn.Parameter WILL BE CONSIDERED A BIAS PARAM FOR</span>
- <span class="sd"> ZERO WEIGHT DECAY.</span>
- <span class="sd"> """</span>
- <span class="n">batchnorm_types</span> <span class="o">=</span> <span class="p">(</span><span class="n">_BatchNorm</span><span class="p">,)</span>
- <span class="n">torch_weight_with_bias_types</span> <span class="o">=</span> <span class="p">(</span><span class="n">_ConvNd</span><span class="p">,</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">)</span>
- <span class="n">no_decay_ids</span> <span class="o">=</span> <span class="p">[]</span>
- <span class="k">for</span> <span class="n">name</span><span class="p">,</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">module</span><span class="o">.</span><span class="n">named_modules</span><span class="p">():</span>
- <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">batchnorm_types</span><span class="p">):</span>
- <span class="n">no_decay_ids</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">id</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">weight</span><span class="p">))</span>
- <span class="n">no_decay_ids</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">id</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">bias</span><span class="p">))</span>
- <span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="s2">"bias"</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">bias</span><span class="p">,</span> <span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">):</span>
- <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">torch_weight_with_bias_types</span><span class="p">):</span>
- <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Module class: </span><span class="si">{</span><span class="n">m</span><span class="o">.</span><span class="vm">__class__</span><span class="si">}</span><span class="s2">, have a `bias` parameter attribute but is not instance of"</span>
- <span class="sa">f</span><span class="s2">" torch primitive modules, this bias parameter will be part of param group with zero"</span>
- <span class="sa">f</span><span class="s2">" weight decay."</span><span class="p">)</span>
- <span class="n">no_decay_ids</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">id</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">bias</span><span class="p">))</span>
- <span class="k">return</span> <span class="n">no_decay_ids</span>
- <div class="viewcode-block" id="build_optimizer"><a class="viewcode-back" href="../../../../super_gradients.training.utils.html#super_gradients.training.utils.optimizer_utils.build_optimizer">[docs]</a><span class="k">def</span> <span class="nf">build_optimizer</span><span class="p">(</span><span class="n">net</span><span class="p">,</span> <span class="n">lr</span><span class="p">,</span> <span class="n">training_params</span><span class="p">):</span>
- <span class="sd">"""</span>
- <span class="sd"> Wrapper function for initializing the optimizer</span>
- <span class="sd"> :param net: the nn_module to build the optimizer for</span>
- <span class="sd"> :param lr: initial learning rate</span>
- <span class="sd"> :param training_params: training_parameters</span>
- <span class="sd"> """</span>
- <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">training_params</span><span class="o">.</span><span class="n">optimizer</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
- <span class="n">optimizer_cls</span> <span class="o">=</span> <span class="n">OptimizersTypeFactory</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">training_params</span><span class="o">.</span><span class="n">optimizer</span><span class="p">)</span>
- <span class="k">else</span><span class="p">:</span>
- <span class="n">optimizer_cls</span> <span class="o">=</span> <span class="n">training_params</span><span class="o">.</span><span class="n">optimizer</span>
- <span class="n">default_optimizer_params</span> <span class="o">=</span> <span class="n">OPTIMIZERS_DEFAULT_PARAMS</span><span class="p">[</span><span class="n">optimizer_cls</span><span class="p">]</span> <span class="k">if</span> <span class="n">optimizer_cls</span> <span class="ow">in</span> <span class="n">OPTIMIZERS_DEFAULT_PARAMS</span> <span class="k">else</span> <span class="p">{}</span>
- <span class="n">training_params</span><span class="o">.</span><span class="n">optimizer_params</span> <span class="o">=</span> <span class="n">get_param</span><span class="p">(</span><span class="n">training_params</span><span class="p">,</span> <span class="s1">'optimizer_params'</span><span class="p">,</span> <span class="n">default_optimizer_params</span><span class="p">)</span>
- <span class="c1"># OPTIMIZER PARAM GROUPS ARE SET USING DEFAULT OR MODEL SPECIFIC INIT</span>
- <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">net</span><span class="o">.</span><span class="n">module</span><span class="p">,</span> <span class="s1">'initialize_param_groups'</span><span class="p">):</span>
- <span class="c1"># INITIALIZE_PARAM_GROUPS MUST RETURN A LIST OF DICTS WITH 'named_params' AND OPTIMIZER's ATTRIBUTES PER GROUP</span>
- <span class="n">net_named_params</span> <span class="o">=</span> <span class="n">net</span><span class="o">.</span><span class="n">module</span><span class="o">.</span><span class="n">initialize_param_groups</span><span class="p">(</span><span class="n">lr</span><span class="p">,</span> <span class="n">training_params</span><span class="p">)</span>
- <span class="k">else</span><span class="p">:</span>
- <span class="n">net_named_params</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">'named_params'</span><span class="p">:</span> <span class="n">net</span><span class="o">.</span><span class="n">named_parameters</span><span class="p">()}]</span>
- <span class="k">if</span> <span class="n">training_params</span><span class="o">.</span><span class="n">zero_weight_decay_on_bias_and_bn</span><span class="p">:</span>
- <span class="n">optimizer_training_params</span> <span class="o">=</span> <span class="n">separate_zero_wd_params_groups_for_optimizer</span><span class="p">(</span>
- <span class="n">net</span><span class="o">.</span><span class="n">module</span><span class="p">,</span> <span class="n">net_named_params</span><span class="p">,</span> <span class="n">training_params</span><span class="o">.</span><span class="n">optimizer_params</span><span class="p">[</span><span class="s1">'weight_decay'</span><span class="p">]</span>
- <span class="p">)</span>
- <span class="k">else</span><span class="p">:</span>
- <span class="c1"># Overwrite groups to include params instead of named params</span>
- <span class="k">for</span> <span class="n">ind_group</span><span class="p">,</span> <span class="n">param_group</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">net_named_params</span><span class="p">):</span>
- <span class="n">param_group</span><span class="p">[</span><span class="s1">'params'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="n">param</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="k">for</span> <span class="n">param</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="n">param_group</span><span class="p">[</span><span class="s1">'named_params'</span><span class="p">])]</span>
- <span class="k">del</span> <span class="n">param_group</span><span class="p">[</span><span class="s1">'named_params'</span><span class="p">]</span>
- <span class="n">net_named_params</span><span class="p">[</span><span class="n">ind_group</span><span class="p">]</span> <span class="o">=</span> <span class="n">param_group</span>
- <span class="n">optimizer_training_params</span> <span class="o">=</span> <span class="n">net_named_params</span>
- <span class="c1"># CREATE AN OPTIMIZER OBJECT AND INITIALIZE IT</span>
- <span class="n">optimizer</span> <span class="o">=</span> <span class="n">optimizer_cls</span><span class="p">(</span><span class="n">optimizer_training_params</span><span class="p">,</span> <span class="n">lr</span><span class="o">=</span><span class="n">lr</span><span class="p">,</span> <span class="o">**</span><span class="n">training_params</span><span class="o">.</span><span class="n">optimizer_params</span><span class="p">)</span>
- <span class="k">return</span> <span class="n">optimizer</span></div>
- </pre></div>
- </div>
- </div>
- <footer>
- <hr/>
- <div role="contentinfo">
- <p>© Copyright 2021, SuperGradients team.</p>
- </div>
- Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
- <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
- provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
- </footer>
- </div>
- </div>
- </section>
- </div>
- <script>
- jQuery(function () {
- SphinxRtdTheme.Navigation.enable(true);
- });
- </script>
- </body>
- </html>
|