#378 Feature/sg 281 add kd notebook

Merged

Ghost merged 1 commits into Deci-AI:master from deci-ai:feature/SG-281-add_kd_notebook

  
    
        
          
1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

          
            from super_gradients.training.models.sg_module import SgModule
from collections import namedtuple
import torch
from super_gradients.training.utils.utils import HpmStruct
from super_gradients.training.utils import get_param
KDOutput = namedtuple('KDOutput', 'student_output teacher_output')
class KDModule(SgModule):
    """
    KDModule
    class implementing Knowledge Distillation logic as an SgModule
    attributes:
        student: SgModule - the student model
        teacher: torch.nn.Module- the teacher model
        run_teacher_on_eval: bool- whether to run self.teacher at eval mode regardless of self.train(mode)
        arch_params: HpmStruct- Architecture H.P.
        Additionally, by passing teacher_input_adapter (torch.nn.Module) one can modify the teacher net s.t
        teacher = torch.nn.Sequential(teacher_input_adapter, teacher). This is useful when teacher net expects a
        different input format from the student (for example different normalization).
    """
    def __init__(self, arch_params: HpmStruct, student: SgModule, teacher: torch.nn.Module, run_teacher_on_eval=False):
        super(KDModule, self).__init__()
        self.arch_params = arch_params
        self.student = student
        self.teacher = teacher
        self.teacher_input_adapter = get_param(self.arch_params, "teacher_input_adapter")
        self.run_teacher_on_eval = run_teacher_on_eval
        self._freeze_teacher()
        # WHEN CREATING A MODULE SELF.TRAIN() ISN'T CALLED AND SO THE TEACHER MUST BE MOVED TO EVAL MODE EXPLICITLY
        if self.run_teacher_on_eval:
            self.teacher.eval()
    def _freeze_teacher(self):
        for p in self.teacher.parameters():
            p.requires_grad = False
        if self.teacher_input_adapter is not None:
            for p in self.teacher_input_adapter.parameters():
                p.requires_grad = False
            self.teacher_input_adapter.eval()
    def train(self, mode=True):
        self.student.train(mode)
        if not self.run_teacher_on_eval:
            self.teacher.train(mode)
    def eval(self):
        self.student.eval()
        self.teacher.eval()
    def forward(self, x):
        if self.teacher_input_adapter is not None:
            return KDOutput(student_output=self.student(x),
                            teacher_output=self.teacher(self.teacher_input_adapter(x)))
        else:
            return KDOutput(student_output=self.student(x),
                            teacher_output=self.teacher(x))
    def initialize_param_groups(self, lr: float, training_params: HpmStruct) -> list:
        return self.student.initialize_param_groups(lr, training_params)
    def update_param_groups(self, param_groups: list, lr: float, epoch: int, iter: int, training_params: HpmStruct,
                            total_batch: int) -> list:
        return self.student.update_param_groups(param_groups, lr, epoch, iter, training_params, total_batch)
    def replace_head(self, **kwargs):
        self.student.replace_head(**kwargs)

          
        
      

  

Tip!

Press p or to see the previous file or, n or to see the next file

Deci-AI / super-gradients connected to https://github.com/Deci-AI/super-gradients.git

#378 Feature/sg 281 add kd notebook

Deci-AI
/
super-gradients
connected to https://github.com/Deci-AI/super-gradients.git