Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#609 Ci fix

Merged
Ghost merged 1 commits into Deci-AI:master from deci-ai:bugfix/infra-000_ci
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
  1. import numpy as np
  2. import GPUtil
  3. from typing import Union
  4. from collections import OrderedDict
  5. import torch
  6. import torch.nn as nn
  7. from super_gradients.training.utils.utils import Timer
  8. def get_model_stats(model: nn.Module, input_dims: Union[list, tuple], high_verbosity: bool = True, batch_size: int = 1,
  9. device: str = 'cuda', # noqa: C901
  10. dtypes=None, iterations: int = 100):
  11. """
  12. return the model summary as a string
  13. The block(type) column represents the lines (layers) above
  14. :param dtypes: The input types (list of inputs types)
  15. :param high_verbosity: prints layer by layer information
  16. """
  17. dtypes = dtypes or [torch.FloatTensor] * len(input_dims)
  18. def register_hook(module):
  19. """
  20. add a hook (all the desirable actions) for every layer that is not nn.Sequential/nn.ModuleList
  21. """
  22. def hook(module, input, output):
  23. class_name = str(module.__class__).split(".")[-1].split("'")[0]
  24. module_idx = len(summary)
  25. m_key = f'{class_name}-{module_idx + 1}'
  26. summary[m_key] = OrderedDict()
  27. # block_name refers to all layers that contains other layers
  28. if len(module._modules) != 0:
  29. summary[m_key]["block_name"] = class_name
  30. summary[m_key]["inference_time"] = np.round(timer.stop(), 3)
  31. timer.start()
  32. summary[m_key]["gpu_occupation"] = (round(torch.cuda.memory_allocated(0) / 1024 ** 3, 2), 'GB') if torch.cuda.is_available() else [0]
  33. summary[m_key]["gpu_cached_memory"] = (round(torch.cuda.memory_reserved(0) / 1024 ** 3, 2), 'GB') if torch.cuda.is_available() else [0]
  34. summary[m_key]["input_shape"], summary[m_key]["output_shape"] = get_input_output_shapes(
  35. batch_size=batch_size, input_dims=input, output_dims=output)
  36. params = 0
  37. if hasattr(module, "weight") and hasattr(module.weight, "size"):
  38. params += torch.prod(torch.LongTensor(list(module.weight.size())))
  39. summary[m_key]["trainable"] = module.weight.requires_grad
  40. if hasattr(module, "bias") and hasattr(module.bias, "size"):
  41. params += torch.prod(torch.LongTensor(list(module.bias.size())))
  42. summary[m_key]["nb_params"] = params
  43. if (not isinstance(module, nn.Sequential) and not isinstance(module, nn.ModuleList)):
  44. hooks.append(module.register_forward_hook(hook))
  45. # multiple inputs to the network
  46. if isinstance(input_dims, tuple):
  47. input_dims = [input_dims]
  48. x = [torch.rand(batch_size, *input_dim).type(dtype).to(device=device)
  49. for input_dim, dtype in zip(input_dims, dtypes)]
  50. summary_list = []
  51. with torch.no_grad():
  52. for i in range(iterations + 10):
  53. # create properties
  54. summary = OrderedDict()
  55. hooks = []
  56. # register hook
  57. model.apply(register_hook)
  58. timer = Timer(device=device)
  59. timer.start()
  60. # make a forward pass
  61. model(*x)
  62. # remove these hooks
  63. for h in hooks:
  64. h.remove()
  65. # we start counting from the 10th iteration for warmup
  66. if i >= 10:
  67. summary_list.append(summary)
  68. summary = _average_inference_time(summary_list=summary_list, summary=summary, divisor=iterations)
  69. return _convert_summary_dict_to_string(summary=summary, high_verbosity=high_verbosity, input_dims=input_dims,
  70. batch_size=batch_size, device=device)
  71. def _average_inference_time(summary_list: list, summary: OrderedDict, divisor: int = 100):
  72. inference_time_dict = {}
  73. for idx, sum in enumerate(summary_list):
  74. for key, _ in sum.items():
  75. if idx == 0:
  76. inference_time_dict[key] = sum[key]['inference_time']
  77. else:
  78. inference_time_dict[key] += sum[key]['inference_time']
  79. for key, _ in summary.items():
  80. summary[key]['inference_time'] = np.round(inference_time_dict[key] / divisor, 3)
  81. return summary
  82. def get_input_output_shapes(batch_size: int, input_dims: Union[list, tuple], output_dims: Union[list, tuple]):
  83. """
  84. Returns input/output shapes for single/multiple input/s output/s
  85. """
  86. if isinstance(input_dims[0], list):
  87. input_shape = [i.size() for i in input_dims[0] if i is not None]
  88. else:
  89. input_shape = list(input_dims[0].size())
  90. input_shape[0] = batch_size
  91. if isinstance(output_dims, (list, tuple)):
  92. output_shape = [
  93. [-1] + list(o.size())[1:] for o in output_dims if o is not None
  94. ]
  95. else:
  96. output_shape = list(output_dims.size())
  97. output_shape[0] = batch_size
  98. return input_shape, output_shape
  99. def _convert_summary_dict_to_string(summary: dict, high_verbosity: bool, input_dims: Union[list, tuple],
  100. batch_size: int, device: str):
  101. """
  102. Takes summary dict and Returns summary string
  103. """
  104. summary_str = ''
  105. total_params = 0
  106. total_output = 0
  107. trainable_params = 0
  108. if high_verbosity:
  109. summary_str += f"{'-' * 200}\n"
  110. line_new = f'{"block (type)":>20} {"Layer (type)":>20} {"Output Shape":>63} {"Param #":>15} ' \
  111. f'{"inference time[ms]":>25} {"gpu_cached_memory[GB]":>25} {"gpu_occupation[GB]":>25}'
  112. summary_str += f"{line_new}\n"
  113. summary_str += f"{'=' * 200}\n"
  114. for layer in summary:
  115. # input_shape, output_shape, trainable, nb_params
  116. line_new = "{:>20} {:>20} {:>63} {:>15} {:>25} {:>25} {:>25}".format(
  117. str(summary[layer]["block_name"]) if "block_name" in summary[layer].keys() else "",
  118. layer,
  119. str(summary[layer]["output_shape"]),
  120. "{0:,}".format(summary[layer]["nb_params"]),
  121. "{0:,}".format(summary[layer]["inference_time"]),
  122. "{0:,}".format(summary[layer]["gpu_cached_memory"][0]),
  123. "{0:,}".format(summary[layer]["gpu_occupation"][0])
  124. )
  125. total_params += summary[layer]["nb_params"]
  126. total_output += np.prod(summary[layer]["output_shape"])
  127. if "trainable" in summary[layer]:
  128. if summary[layer]["trainable"]:
  129. trainable_params += summary[layer]["nb_params"]
  130. if high_verbosity:
  131. summary_str += line_new + "\n"
  132. # assume 4 bytes/number (float on cuda).
  133. total_input_size = abs(np.prod(sum(input_dims, ())) * batch_size * 4. / (1024 ** 2.))
  134. total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients
  135. total_params_size = abs(total_params * 4. / (1024 ** 2.))
  136. total_size = total_params_size + total_output_size + total_input_size
  137. gpus = GPUtil.getGPUs()
  138. gpu_memory_utilization = [gpu.memoryUtil * 100 for gpu in gpus]
  139. summary_str += f"{'=' * 200}\n" \
  140. f"Total params: {total_params:,}\n" \
  141. f"Trainable params: {trainable_params:,}\n" \
  142. f"Non-trainable params: {total_params - trainable_params:,}\n" \
  143. f"{'-' * 200}\n" \
  144. f"Input size (MB): {total_input_size:.2f}\n" \
  145. f"Forward/backward pass size (MB): {total_output_size:.2f}\n" \
  146. f"Params size (MB): {total_params_size}\n" \
  147. f"Estimated Total Size (MB): {total_size}\n"
  148. summary_str += str(["Memory Footprint (percentage): %0.2f" % gpu_memory_utilization[i] for i in range(4)]) + "\n"
  149. summary_str += f"{'-' * 200}\n" if device == 'cuda' else f"{'-' * 200}\n"
  150. return summary_str
Discard
Tip!

Press p or to see the previous file or, n or to see the next file