Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#869 Add DagsHub Logger to Super Gradients

Merged
Ghost merged 1 commits into Deci-AI:master from timho102003:dagshub_logger
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
  1. import os
  2. import json
  3. import sys
  4. import shutil
  5. from zipfile import ZipFile
  6. from typing import List, Optional, Any
  7. import importlib.util
  8. from omegaconf import DictConfig
  9. from torch import nn
  10. import super_gradients
  11. from super_gradients.common.environment.env_variables import env_variables
  12. from super_gradients.common.abstractions.abstract_logger import get_logger
  13. from super_gradients.common.environment.cfg_utils import load_arch_params, load_recipe
  14. logger = get_logger(__name__)
  15. client_enabled = True
  16. try:
  17. from deci_lab_client.client import DeciPlatformClient
  18. from deci_lab_client.types import S3SignedUrl
  19. from deci_lab_client.models import ModelBenchmarkState
  20. from deci_common.data_interfaces.files_data_interface import FilesDataInterface
  21. from deci_lab_client.models import AutoNACFileName
  22. from deci_lab_client import ApiException, BodyRegisterUserArchitecture
  23. except (ImportError, NameError):
  24. client_enabled = False
  25. class DeciClient:
  26. """
  27. A client to deci platform and model zoo.
  28. requires credentials for connection
  29. """
  30. def __init__(self):
  31. if not client_enabled:
  32. logger.error(
  33. "deci-lab-client or deci-common are not installed. Model cannot be loaded from deci lab."
  34. "Please install deci-lab-client>=2.55.0 and deci-common>=3.4.1"
  35. )
  36. return
  37. self.api_host = env_variables.DECI_API_HOST
  38. self.lab_client = DeciPlatformClient(api_host=self.api_host)
  39. self.lab_client.login(token=env_variables.DECI_PLATFORM_TOKEN)
  40. def _get_file(self, model_name: str, file_name: str) -> Optional[str]:
  41. """Get a file from the DeciPlatform if it exists, otherwise returns None
  42. :param model_name: Name of the model to download from, as saved in the platform.
  43. :param file_name: Name of the file to download
  44. :return: Path were the downloaded file was saved to. None if not found.
  45. """
  46. try:
  47. response = self.lab_client.get_autonac_model_file_link(
  48. model_name=model_name, file_name=file_name, super_gradients_version=super_gradients.__version__
  49. )
  50. download_link = response.data
  51. except ApiException as e:
  52. if e.status == 401:
  53. logger.error(
  54. "Unauthorized. wrong token or token was not defined. please login to deci-lab-client " "by calling DeciPlatformClient().login(<token>)"
  55. )
  56. elif e.status == 400 and e.body is not None and "message" in e.body:
  57. logger.error(f"Deci client: {json.loads(e.body)['message']}")
  58. else:
  59. logger.debug(e.body)
  60. return None
  61. file_path = FilesDataInterface.download_temporary_file(file_url=download_link)
  62. return file_path
  63. def get_model_arch_params(self, model_name: str) -> Optional[DictConfig]:
  64. """Get the model arch_params from DeciPlatform.
  65. :param model_name: Name of the model as saved in the platform.
  66. :return: arch_params. None if arch_params were not found for this specific model on this SG version."""
  67. arch_params_file = self._get_file(model_name, AutoNACFileName.STRUCTURE_YAML)
  68. if arch_params_file is None:
  69. return None
  70. config_name = os.path.basename(arch_params_file)
  71. download_dir = os.path.dirname(arch_params_file)
  72. # The arch_params config files need to be saved inside an "arch_params" folder
  73. _move_file_to_folder(src_file_path=arch_params_file, dest_dir_name="arch_params")
  74. return load_arch_params(config_name=config_name, recipes_dir_path=download_dir)
  75. def get_model_recipe(self, model_name: str) -> Optional[DictConfig]:
  76. """Get the model recipe from DeciPlatform.
  77. :param model_name: Name of the model as saved in the platform.
  78. :return: recipe. None if recipe were not found for this specific model on this SG version."""
  79. recipe_file = self._get_file(model_name, AutoNACFileName.RECIPE_YAML)
  80. if recipe_file is None:
  81. return None
  82. config_name = os.path.basename(recipe_file)
  83. download_dir = os.path.dirname(recipe_file)
  84. return load_recipe(config_name=config_name, recipes_dir_path=download_dir)
  85. def get_model_weights(self, model_name: str) -> Optional[str]:
  86. """Get the path to model weights (downloaded locally).
  87. :param model_name: Name of the model as saved in the platform.
  88. :return: model_weights path. None if weights were not found for this specific model on this SG version."""
  89. return self._get_file(model_name=model_name, file_name=AutoNACFileName.WEIGHTS_PTH)
  90. def download_and_load_model_additional_code(self, model_name: str, target_path: str, package_name: str = "deci_model_code") -> None:
  91. """
  92. try to download code files for this model.
  93. if found, code files will be placed in the target_path/package_name and imported dynamically
  94. """
  95. file = self._get_file(model_name=model_name, file_name=AutoNACFileName.CODE_ZIP)
  96. package_path = os.path.join(target_path, package_name)
  97. if file is not None:
  98. # crete the directory
  99. os.makedirs(package_path, exist_ok=True)
  100. # extract code files
  101. with ZipFile(file) as zipfile:
  102. zipfile.extractall(package_path)
  103. # add an init file that imports all code files
  104. with open(os.path.join(package_path, "__init__.py"), "w") as init_file:
  105. all_str = "\n\n__all__ = ["
  106. for code_file in os.listdir(path=package_path):
  107. if code_file.endswith(".py") and not code_file.startswith("__init__"):
  108. init_file.write(f'import {code_file.replace(".py", "")}\n')
  109. all_str += f'"{code_file.replace(".py", "")}", '
  110. all_str += "]\n\n"
  111. init_file.write(all_str)
  112. # include in path and import
  113. sys.path.insert(1, package_path)
  114. importlib.import_module(package_name)
  115. logger.info(
  116. f"*** IMPORTANT ***: files required for the model {model_name} were downloaded and added to your code in:\n{package_path}\n"
  117. f"These files will be downloaded to the same location each time the model is fetched from the deci-client.\n"
  118. f"you can override this by passing models.get(... download_required_code=False) and importing the files yourself"
  119. )
  120. def upload_model(self, model: nn.Module, model_meta_data, optimization_request_form):
  121. """
  122. This function will upload the trained model to the Deci Lab
  123. :param model: The resulting model from the training process
  124. :param model_meta_data: Metadata to accompany the model
  125. :param optimization_request_form: The optimization parameters
  126. """
  127. self.lab_client.add_model(
  128. add_model_request=model_meta_data,
  129. optimization_request=optimization_request_form,
  130. local_loaded_model=model,
  131. )
  132. def is_model_benchmarking(self, name: str) -> bool:
  133. """Check if a given model is still benchmarking or not.
  134. :param name: The mode name.
  135. """
  136. benchmark_state = self.lab_client.get_model_by_name(name=name).data.benchmark_state
  137. return benchmark_state in [ModelBenchmarkState.IN_PROGRESS, ModelBenchmarkState.PENDING]
  138. def register_experiment(self, name: str, model_name: str, resume: bool):
  139. """Registers a training experiment in Deci's backend.
  140. :param name: Name of the experiment to register
  141. :param model_name: Name of the model architecture to connect the experiment to
  142. """
  143. try:
  144. self.lab_client.register_user_architecture(BodyRegisterUserArchitecture(architecture_name=model_name))
  145. except ApiException as e:
  146. if e.status == 422:
  147. logger.debug(f"The model was already registered, or validation error: {e.body}")
  148. else:
  149. raise e
  150. self.lab_client.register_experiment(name=name, model_name=model_name, resume=resume)
  151. def save_experiment_file(self, file_path: str):
  152. """
  153. Uploads a training related file to Deci's location in S3. This can be a TensorBoard file or a log
  154. :params file_path: The local path of the file to be uploaded
  155. """
  156. self.lab_client.save_experiment_file(file_path=file_path)
  157. def upload_file_to_s3(self, tag: str, level: str, from_path: str):
  158. """Upload a file to the platform S3 bucket.
  159. :param tag: Tag that will be associated to the file.
  160. :param level: Logging level that will be used to notify the monitoring system that the file was uploaded.
  161. :param from_path: Path of the file to upload.
  162. """
  163. data = self.lab_client.upload_log_url(tag=tag, level=level)
  164. signed_url = S3SignedUrl(**data.data)
  165. self.lab_client.upload_file_to_s3(from_path=from_path, s3_signed_url=signed_url)
  166. def add_model(
  167. self,
  168. model_metadata,
  169. hardware_types: List[str],
  170. model_path: Optional[str] = None,
  171. model: Optional[nn.Module] = None,
  172. **kwargs: Any,
  173. ):
  174. """Adds a new model to the company's model repository.
  175. :param model_metadata: The model metadata.
  176. :param hardware_types: The hardware types you want to benchmark the model on.
  177. :param model_path: The path of the model on the local operating system.
  178. :param model: Pytorch loaded model object.
  179. If your model's framework is pytorch you may pass the following parameters as kwargs in order to control the conversion to onnx
  180. :param kwargs: Extra arguments to be passed to the PyTorch to ONNX conversion, for example:
  181. opset_version
  182. do_constant_folding
  183. dynamic_axes
  184. input_names
  185. output_names
  186. """
  187. self.lab_client.add_model_v2(model_metadata=model_metadata, hardware_types=hardware_types, model_path=model_path, model=model, **kwargs)
  188. def _move_file_to_folder(src_file_path: str, dest_dir_name: str) -> str:
  189. """Move a file to a newly created folder in the same directory.
  190. :param src_file_path: Path of the file to be moved.
  191. :param dest_dir_name: Name of the destination folder.
  192. :return: The path of the moved file.
  193. """
  194. src_dir_path = os.path.dirname(src_file_path)
  195. dest_dir_path = os.path.join(src_dir_path, dest_dir_name)
  196. dest_file_path = os.path.join(dest_dir_path, os.path.basename(src_file_path))
  197. os.makedirs(dest_dir_path, exist_ok=True)
  198. shutil.copyfile(src_file_path, dest_file_path)
  199. return dest_file_path
Discard
Tip!

Press p or to see the previous file or, n or to see the next file