#609 Ci fix

Merged
Ghost merged 1 commits into Deci-AI:master from deci-ai:bugfix/infra-000_ci
  
    
        
          
1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

          
            import numpy as np
import GPUtil
from typing import Union
from collections import OrderedDict
import torch
import torch.nn as nn
from super_gradients.training.utils.utils import Timer
def get_model_stats(model: nn.Module, input_dims: Union[list, tuple], high_verbosity: bool = True, batch_size: int = 1,
                    device: str = 'cuda',  # noqa: C901
                    dtypes=None, iterations: int = 100):
    """
    return the model summary as a string
    The block(type) column represents the lines (layers) above
        :param dtypes:          The input types (list of inputs types)
        :param high_verbosity:  prints layer by layer information
    """
    dtypes = dtypes or [torch.FloatTensor] * len(input_dims)
    def register_hook(module):
        """
        add a hook (all the desirable actions) for every layer that is not nn.Sequential/nn.ModuleList
        """
        def hook(module, input, output):
            class_name = str(module.__class__).split(".")[-1].split("'")[0]
            module_idx = len(summary)
            m_key = f'{class_name}-{module_idx + 1}'
            summary[m_key] = OrderedDict()
            # block_name refers to all layers that contains other layers
            if len(module._modules) != 0:
                summary[m_key]["block_name"] = class_name
            summary[m_key]["inference_time"] = np.round(timer.stop(), 3)
            timer.start()
            summary[m_key]["gpu_occupation"] = (round(torch.cuda.memory_allocated(0) / 1024 ** 3, 2), 'GB') if torch.cuda.is_available() else [0]
            summary[m_key]["gpu_cached_memory"] = (round(torch.cuda.memory_reserved(0) / 1024 ** 3, 2), 'GB') if torch.cuda.is_available() else [0]
            summary[m_key]["input_shape"], summary[m_key]["output_shape"] = get_input_output_shapes(
                batch_size=batch_size, input_dims=input, output_dims=output)
            params = 0
            if hasattr(module, "weight") and hasattr(module.weight, "size"):
                params += torch.prod(torch.LongTensor(list(module.weight.size())))
                summary[m_key]["trainable"] = module.weight.requires_grad
            if hasattr(module, "bias") and hasattr(module.bias, "size"):
                params += torch.prod(torch.LongTensor(list(module.bias.size())))
            summary[m_key]["nb_params"] = params
        if (not isinstance(module, nn.Sequential) and not isinstance(module, nn.ModuleList)):
            hooks.append(module.register_forward_hook(hook))
    # multiple inputs to the network
    if isinstance(input_dims, tuple):
        input_dims = [input_dims]
    x = [torch.rand(batch_size, *input_dim).type(dtype).to(device=device)
         for input_dim, dtype in zip(input_dims, dtypes)]
    summary_list = []
    with torch.no_grad():
        for i in range(iterations + 10):
            # create properties
            summary = OrderedDict()
            hooks = []
            # register hook
            model.apply(register_hook)
            timer = Timer(device=device)
            timer.start()
            # make a forward pass
            model(*x)
            # remove these hooks
            for h in hooks:
                h.remove()
            # we start counting from the 10th iteration for warmup
            if i >= 10:
                summary_list.append(summary)
    summary = _average_inference_time(summary_list=summary_list, summary=summary, divisor=iterations)
    return _convert_summary_dict_to_string(summary=summary, high_verbosity=high_verbosity, input_dims=input_dims,
                                           batch_size=batch_size, device=device)
def _average_inference_time(summary_list: list, summary: OrderedDict, divisor: int = 100):
    inference_time_dict = {}
    for idx, sum in enumerate(summary_list):
        for key, _ in sum.items():
            if idx == 0:
                inference_time_dict[key] = sum[key]['inference_time']
            else:
                inference_time_dict[key] += sum[key]['inference_time']
    for key, _ in summary.items():
        summary[key]['inference_time'] = np.round(inference_time_dict[key] / divisor, 3)
    return summary
def get_input_output_shapes(batch_size: int, input_dims: Union[list, tuple], output_dims: Union[list, tuple]):
    """
    Returns input/output shapes for single/multiple input/s output/s
    """
    if isinstance(input_dims[0], list):
        input_shape = [i.size() for i in input_dims[0] if i is not None]
    else:
        input_shape = list(input_dims[0].size())
    input_shape[0] = batch_size
    if isinstance(output_dims, (list, tuple)):
        output_shape = [
            [-1] + list(o.size())[1:] for o in output_dims if o is not None
        ]
    else:
        output_shape = list(output_dims.size())
        output_shape[0] = batch_size
    return input_shape, output_shape
def _convert_summary_dict_to_string(summary: dict, high_verbosity: bool, input_dims: Union[list, tuple],
                                    batch_size: int, device: str):
    """
    Takes summary dict and Returns summary string
    """
    summary_str = ''
    total_params = 0
    total_output = 0
    trainable_params = 0
    if high_verbosity:
        summary_str += f"{'-' * 200}\n"
        line_new = f'{"block (type)":>20} {"Layer (type)":>20} {"Output Shape":>63} {"Param #":>15} ' \
                   f'{"inference time[ms]":>25} {"gpu_cached_memory[GB]":>25} {"gpu_occupation[GB]":>25}'
        summary_str += f"{line_new}\n"
        summary_str += f"{'=' * 200}\n"
    for layer in summary:
        # input_shape, output_shape, trainable, nb_params
        line_new = "{:>20} {:>20}  {:>63} {:>15} {:>25} {:>25} {:>25}".format(
            str(summary[layer]["block_name"]) if "block_name" in summary[layer].keys() else "",
            layer,
            str(summary[layer]["output_shape"]),
            "{0:,}".format(summary[layer]["nb_params"]),
            "{0:,}".format(summary[layer]["inference_time"]),
            "{0:,}".format(summary[layer]["gpu_cached_memory"][0]),
            "{0:,}".format(summary[layer]["gpu_occupation"][0])
        )
        total_params += summary[layer]["nb_params"]
        total_output += np.prod(summary[layer]["output_shape"])
        if "trainable" in summary[layer]:
            if summary[layer]["trainable"]:
                trainable_params += summary[layer]["nb_params"]
        if high_verbosity:
            summary_str += line_new + "\n"
    # assume 4 bytes/number (float on cuda).
    total_input_size = abs(np.prod(sum(input_dims, ())) * batch_size * 4. / (1024 ** 2.))
    total_output_size = abs(2. * total_output * 4. / (1024 ** 2.))  # x2 for gradients
    total_params_size = abs(total_params * 4. / (1024 ** 2.))
    total_size = total_params_size + total_output_size + total_input_size
    gpus = GPUtil.getGPUs()
    gpu_memory_utilization = [gpu.memoryUtil * 100 for gpu in gpus]
    summary_str += f"{'=' * 200}\n" \
                   f"Total params: {total_params:,}\n" \
                   f"Trainable params: {trainable_params:,}\n" \
                   f"Non-trainable params: {total_params - trainable_params:,}\n" \
                   f"{'-' * 200}\n" \
                   f"Input size (MB): {total_input_size:.2f}\n" \
                   f"Forward/backward pass size (MB): {total_output_size:.2f}\n" \
                   f"Params size (MB): {total_params_size}\n" \
                   f"Estimated Total Size (MB): {total_size}\n"
    summary_str += str(["Memory Footprint (percentage): %0.2f" % gpu_memory_utilization[i] for i in range(4)]) + "\n"
    summary_str += f"{'-' * 200}\n" if device == 'cuda' else f"{'-' * 200}\n"
    return summary_str

          
        
      

  
Tip!
Press p or to see the previous file or, n or to see the next file
Deci-AI / super-gradients connected to https://github.com/Deci-AI/super-gradients.git

#609 Ci fix

Deci-AI
/
super-gradients
connected to https://github.com/Deci-AI/super-gradients.git