#378 Feature/sg 281 add kd notebook

Merged
Ghost merged 1 commits into Deci-AI:master from deci-ai:feature/SG-281-add_kd_notebook
  
    
        
          
1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

          
            import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Union, List, Tuple
from super_gradients.training.utils.module_utils import ConvBNReLU, make_upsample_module
from super_gradients.common import UpsampleMode
from super_gradients.training.models.segmentation_models.stdc import SegmentationHead, AbstractSTDCBackbone,\
    STDC1Backbone, STDC2Backbone
from super_gradients.training.models.segmentation_models.segmentation_module import SegmentationModule
from super_gradients.training.utils import HpmStruct, get_param
class SPPM(nn.Module):
    """
    Simple Pyramid Pooling context Module.
    """
    def __init__(self,
                 in_channels: int,
                 inter_channels: int,
                 out_channels: int,
                 pool_sizes: List[Union[int, Tuple[int, int]]],
                 upsample_mode: Union[UpsampleMode, str] = UpsampleMode.BILINEAR,
                 align_corners: bool = False):
        """
        :param inter_channels: num channels in each pooling branch.
        :param out_channels: The number of output channels after pyramid pooling module.
        :param pool_sizes: spatial output sizes of the pooled feature maps.
        """
        super().__init__()
        self.branches = nn.ModuleList([
            nn.Sequential(
                nn.AdaptiveAvgPool2d(pool_size),
                ConvBNReLU(in_channels, inter_channels, kernel_size=1, bias=False),
            ) for pool_size in pool_sizes
        ])
        self.conv_out = ConvBNReLU(inter_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.out_channels = out_channels
        self.upsample_mode = upsample_mode
        self.align_corners = align_corners
        self.pool_sizes = pool_sizes
    def forward(self, x):
        out = None
        input_shape = x.shape[2:]
        for branch in self.branches:
            y = branch(x)
            y = F.interpolate(y, size=input_shape, mode=self.upsample_mode, align_corners=self.align_corners)
            out = y if out is None else out + y
        out = self.conv_out(out)
        return out
    def prep_model_for_conversion(self, input_size: Union[tuple, list], stride_ratio: int = 32, **kwargs):
        """
        Replace Global average pooling with fixed kernels Average pooling, since dynamic kernel sizes are not supported
        when compiling to ONNX: `Unsupported: ONNX export of operator adaptive_avg_pool2d, input size not accessible.`
        """
        input_size = [x / stride_ratio for x in input_size[-2:]]
        for branch in self.branches:
            global_pool: nn.AdaptiveAvgPool2d = branch[0]
            out_size = global_pool.output_size
            out_size = out_size if isinstance(out_size, (tuple, list)) else (out_size, out_size)
            kernel_size = [int(i / o) for i, o in zip(input_size, out_size)]
            branch[0] = nn.AvgPool2d(kernel_size=kernel_size, stride=kernel_size)
class UAFM(nn.Module):
    """
    Unified Attention Fusion Module, which uses mean and max values across the spatial dimensions.
    """
    def __init__(self,
                 in_channels: int,
                 skip_channels: int,
                 out_channels: int,
                 up_factor: int,
                 upsample_mode: Union[UpsampleMode, str] = UpsampleMode.BILINEAR,
                 align_corners: bool = False):
        """
        :params in_channels: num_channels of input feature map.
        :param skip_channels: num_channels of skip connection feature map.
        :param out_channels: num out channels after features fusion.
        :param up_factor: upsample scale factor of the input feature map.
        :param upsample_mode: see UpsampleMode for valid options.
        """
        super().__init__()
        self.conv_atten = nn.Sequential(
            ConvBNReLU(4, 2, kernel_size=3, padding=1, bias=False),
            ConvBNReLU(2, 1, kernel_size=3, padding=1, bias=False, use_activation=False)
        )
        self.proj_skip = nn.Identity() if skip_channels == in_channels else \
            ConvBNReLU(skip_channels, in_channels, kernel_size=3, padding=1, bias=False)
        self.up_x = nn.Identity() if up_factor == 1 else \
            make_upsample_module(scale_factor=up_factor, upsample_mode=upsample_mode, align_corners=align_corners)
        self.conv_out = ConvBNReLU(in_channels, out_channels, kernel_size=3, padding=1, bias=False)
    def forward(self, x, skip):
        """
        :param x: input feature map to upsample before fusion.
        :param skip: skip connection feature map.
        """
        x = self.up_x(x)
        skip = self.proj_skip(skip)
        atten = torch.cat([
            *self._avg_max_spatial_reduce(x, use_concat=False),
            *self._avg_max_spatial_reduce(skip, use_concat=False)
        ], dim=1)
        atten = self.conv_atten(atten)
        atten = torch.sigmoid(atten)
        out = x * atten + skip * (1 - atten)
        out = self.conv_out(out)
        return out
    @staticmethod
    def _avg_max_spatial_reduce(x, use_concat: bool = False):
        reduced = [
            torch.mean(x, dim=1, keepdim=True),
            torch.max(x, dim=1, keepdim=True)[0]
        ]
        if use_concat:
            reduced = torch.cat(reduced, dim=1)
        return reduced
class PPLiteSegEncoder(nn.Module):
    """
    Encoder for PPLiteSeg, include backbone followed by a context module.
    """
    def __init__(self,
                 backbone: AbstractSTDCBackbone,
                 projection_channels_list: List[int],
                 context_module: nn.Module):
        super().__init__()
        self.backbone = backbone
        self.context_module = context_module
        feats_channels = backbone.get_backbone_output_number_of_channels()
        self.proj_convs = nn.ModuleList([
            ConvBNReLU(feat_ch, proj_ch, kernel_size=3, padding=1, bias=False)
            for feat_ch, proj_ch in zip(feats_channels, projection_channels_list)
        ])
        self.projection_channels_list = projection_channels_list
    def get_output_number_of_channels(self) -> List[int]:
        channels_list = self.projection_channels_list
        if hasattr(self.context_module, "out_channels"):
            channels_list.append(self.context_module.out_channels)
        return channels_list
    def forward(self, x):
        feats = self.backbone(x)
        y = self.context_module(feats[-1])
        feats = [conv(f) for conv, f in zip(self.proj_convs, feats)]
        return feats + [y]
class PPLiteSegDecoder(nn.Module):
    """
    PPLiteSegDecoder using UAFM blocks to fuse feature maps.
    """
    def __init__(self,
                 encoder_channels: List[int],
                 up_factors: List[int],
                 out_channels: List[int],
                 upsample_mode,
                 align_corners: bool):
        super().__init__()
        # Make a copy of channels list, to prevent out of scope changes.
        encoder_channels = encoder_channels.copy()
        encoder_channels.reverse()
        in_channels = encoder_channels.pop(0)
        # TODO - assert argument length
        self.up_stages = nn.ModuleList()
        for skip_ch, up_factor, out_ch in zip(encoder_channels, up_factors, out_channels):
            self.up_stages.append(UAFM(
                in_channels=in_channels,
                skip_channels=skip_ch,
                out_channels=out_ch,
                up_factor=up_factor,
                upsample_mode=upsample_mode,
                align_corners=align_corners
            ))
            in_channels = out_ch
    def forward(self, feats: List[torch.Tensor]):
        feats.reverse()
        x = feats.pop(0)
        for up_stage, skip in zip(self.up_stages, feats):
            x = up_stage(x, skip)
        return x
class PPLiteSegBase(SegmentationModule):
    """
    The PP_LiteSeg implementation based on PaddlePaddle.
    The original article refers to "Juncai Peng, Yi Liu, Shiyu Tang, Yuying Hao, Lutao Chu,
    Guowei Chen, Zewu Wu, Zeyu Chen, Zhiliang Yu, Yuning Du, Qingqing Dang,Baohua Lai,
    Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma. PP-LiteSeg: A Superior Real-Time Semantic
    Segmentation Model. https://arxiv.org/abs/2204.02681".
    """
    def __init__(self,
                 num_classes,
                 backbone: AbstractSTDCBackbone,
                 projection_channels_list: List[int],
                 sppm_inter_channels: int,
                 sppm_out_channels: int,
                 sppm_pool_sizes: List[int],
                 sppm_upsample_mode: Union[UpsampleMode, str],
                 align_corners: bool,
                 decoder_up_factors: List[int],
                 decoder_channels: List[int],
                 decoder_upsample_mode: Union[UpsampleMode, str],
                 head_scale_factor: int,
                 head_upsample_mode: Union[UpsampleMode, str],
                 head_mid_channels: int,
                 dropout: float,
                 use_aux_heads: bool,
                 aux_hidden_channels: List[int],
                 aux_scale_factors: List[int]
                 ):
        """
        :param backbone: Backbone nn.Module should implement the abstract class `AbstractSTDCBackbone`.
        :param projection_channels_list: channels list to project encoder features before fusing with the decoder
            stream.
        :param sppm_inter_channels: num channels in each sppm pooling branch.
        :param sppm_out_channels: The number of output channels after sppm module.
        :param sppm_pool_sizes: spatial output sizes of the pooled feature maps.
        :param sppm_upsample_mode: Upsample mode to original size after pooling.
        :param decoder_up_factors: list upsample factor per decoder stage.
        :param decoder_channels: list of num_channels per decoder stage.
        :param decoder_upsample_mode: upsample mode in decoder stages, see UpsampleMode for valid options.
        :param head_scale_factor: scale factor for final the segmentation head logits.
        :param head_upsample_mode: upsample mode to final prediction sizes, see UpsampleMode for valid options.
        :param head_mid_channels: num of hidden channels in segmentation head.
        :param use_aux_heads: set True when training, output extra Auxiliary feature maps from the encoder module.
        :param aux_hidden_channels: List of hidden channels in auxiliary segmentation heads.
        :param aux_scale_factors: list of uppsample factors for final auxiliary heads logits.
        """
        super().__init__(use_aux_heads=use_aux_heads)
        # Init Encoder
        backbone_out_channels = backbone.get_backbone_output_number_of_channels()
        assert len(backbone_out_channels) == len(projection_channels_list), \
            f"The length of backbone outputs ({backbone_out_channels}) should match the length of projection channels" \
            f"({len(projection_channels_list)})."
        context = SPPM(in_channels=backbone_out_channels[-1],
                       inter_channels=sppm_inter_channels,
                       out_channels=sppm_out_channels,
                       pool_sizes=sppm_pool_sizes,
                       upsample_mode=sppm_upsample_mode,
                       align_corners=align_corners)
        self.encoder = PPLiteSegEncoder(backbone=backbone,
                                        context_module=context,
                                        projection_channels_list=projection_channels_list)
        encoder_channels = self.encoder.get_output_number_of_channels()
        # Init Decoder
        self.decoder = PPLiteSegDecoder(encoder_channels=encoder_channels,
                                        up_factors=decoder_up_factors,
                                        out_channels=decoder_channels,
                                        upsample_mode=decoder_upsample_mode,
                                        align_corners=align_corners)
        # Init Segmentation classification heads
        self.seg_head = nn.Sequential(
            SegmentationHead(in_channels=decoder_channels[-1],
                             mid_channels=head_mid_channels,
                             num_classes=num_classes,
                             dropout=dropout),
            make_upsample_module(scale_factor=head_scale_factor, upsample_mode=head_upsample_mode,
                                 align_corners=align_corners)
        )
        # Auxiliary heads
        if self.use_aux_heads:
            encoder_out_channels = projection_channels_list
            self.aux_heads = nn.ModuleList([
                nn.Sequential(
                    SegmentationHead(backbone_ch, hidden_ch, num_classes, dropout=dropout),
                    make_upsample_module(scale_factor=scale_factor, upsample_mode=head_upsample_mode,
                                         align_corners=align_corners)
                ) for backbone_ch, hidden_ch, scale_factor in zip(encoder_out_channels, aux_hidden_channels,
                                                                  aux_scale_factors)
            ])
        self.init_params()
    def _remove_auxiliary_heads(self):
        if hasattr(self, "aux_heads"):
            del self.aux_heads
    @property
    def backbone(self) -> nn.Module:
        """
        Support SG load backbone when training.
        """
        return self.encoder.backbone
    def forward(self, x):
        feats = self.encoder(x)
        if self.use_aux_heads:
            enc_feats = feats[:-1]
        x = self.decoder(feats)
        x = self.seg_head(x)
        if not self.use_aux_heads:
            return x
        aux_feats = [aux_head(feat) for feat, aux_head in zip(enc_feats, self.aux_heads)]
        return tuple([x] + aux_feats)
    def initialize_param_groups(self, lr: float, training_params: HpmStruct) -> list:
        """
        Custom param groups for training:
            - Different lr for backbone and the rest, if `multiply_head_lr` key is in `training_params`.
        """
        multiply_head_lr = get_param(training_params, "multiply_head_lr", 1)
        multiply_lr_params, no_multiply_params = self._separate_lr_multiply_params()
        param_groups = [{"named_params": no_multiply_params, "lr": lr, "name": "no_multiply_params"},
                        {"named_params": multiply_lr_params, "lr": lr * multiply_head_lr, "name": "multiply_lr_params"}]
        return param_groups
    def update_param_groups(self, param_groups: list, lr: float, epoch: int, iter: int, training_params: HpmStruct,
                            total_batch: int) -> list:
        multiply_head_lr = get_param(training_params, "multiply_head_lr", 1)
        for param_group in param_groups:
            param_group['lr'] = lr
            if param_group["name"] == "multiply_lr_params":
                param_group['lr'] *= multiply_head_lr
        return param_groups
    def _separate_lr_multiply_params(self):
        """
        Separate backbone params from the rest.
        :return: iterators of groups named_parameters.
        """
        multiply_lr_params, no_multiply_params = {}, {}
        for name, param in self.named_parameters():
            if "encoder.backbone" in name:
                no_multiply_params[name] = param
            else:
                multiply_lr_params[name] = param
        return multiply_lr_params.items(), no_multiply_params.items()
    def prep_model_for_conversion(self, input_size: Union[tuple, list], stride_ratio: int = 32, **kwargs):
        super().prep_model_for_conversion(input_size, **kwargs)
        if isinstance(self.encoder.context_module, SPPM):
            self.encoder.context_module.prep_model_for_conversion(input_size=input_size, stride_ratio=stride_ratio)
    def replace_head(self, new_num_classes: int, **kwargs):
        for module in self.modules():
            if isinstance(module, SegmentationHead):
                module.replace_num_classes(new_num_classes)
class PPLiteSegB(PPLiteSegBase):
    def __init__(self, arch_params: HpmStruct):
        backbone = STDC2Backbone(in_channels=get_param(arch_params, "in_channels", 3),
                                 out_down_ratios=[8, 16, 32])
        super().__init__(num_classes=get_param(arch_params, "num_classes"),
                         backbone=backbone,
                         projection_channels_list=[96, 128, 128],
                         sppm_inter_channels=128,
                         sppm_out_channels=128,
                         sppm_pool_sizes=[1, 2, 4],
                         sppm_upsample_mode="bilinear",
                         align_corners=False,
                         decoder_up_factors=[1, 2, 2],
                         decoder_channels=[128, 96, 64],
                         decoder_upsample_mode="bilinear",
                         head_scale_factor=8,
                         head_upsample_mode="bilinear",
                         head_mid_channels=64,
                         dropout=get_param(arch_params, "dropout", 0.),
                         use_aux_heads=get_param(arch_params, "use_aux_heads", False),
                         aux_hidden_channels=[32, 64, 64],
                         aux_scale_factors=[8, 16, 32])
class PPLiteSegT(PPLiteSegBase):
    def __init__(self, arch_params: HpmStruct):
        backbone = STDC1Backbone(in_channels=get_param(arch_params, "in_channels", 3),
                                 out_down_ratios=[8, 16, 32])
        super().__init__(num_classes=get_param(arch_params, "num_classes"),
                         backbone=backbone,
                         projection_channels_list=[64, 128, 128],
                         sppm_inter_channels=128,
                         sppm_out_channels=128,
                         sppm_pool_sizes=[1, 2, 4],
                         sppm_upsample_mode="bilinear",
                         align_corners=False,
                         decoder_up_factors=[1, 2, 2],
                         decoder_channels=[128, 64, 32],
                         decoder_upsample_mode="bilinear",
                         head_scale_factor=8,
                         head_upsample_mode="bilinear",
                         head_mid_channels=32,
                         dropout=get_param(arch_params, "dropout", 0.),
                         use_aux_heads=get_param(arch_params, "use_aux_heads", False),
                         aux_hidden_channels=[32, 64, 64],
                         aux_scale_factors=[8, 16, 32])

          
        
      

  
Tip!
Press p or to see the previous file or, n or to see the next file
Deci-AI / super-gradients connected to https://github.com/Deci-AI/super-gradients.git

#378 Feature/sg 281 add kd notebook

Deci-AI
/
super-gradients
connected to https://github.com/Deci-AI/super-gradients.git