Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#257 allow using an external Optimizer (not initialized outside)

Merged
Ofri Masad merged 1 commits into Deci-AI:master from deci-ai:feature/SG-184_external_optimizer
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  1. import os
  2. from super_gradients.common.abstractions.abstract_logger import get_logger
  3. from super_gradients.common.sg_loggers.base_sg_logger import BaseSGLogger
  4. from super_gradients.common.environment.env_helpers import multi_process_safe
  5. from super_gradients.training.params import TrainingParams
  6. logger = get_logger(__name__)
  7. try:
  8. from deci_lab_client.client import DeciPlatformClient
  9. _imported_deci_lab_failure = None
  10. except (ImportError, NameError, ModuleNotFoundError) as import_err:
  11. logger.warn("Failed to import deci_lab_client")
  12. _imported_deci_lab_failure = import_err
  13. TENSORBOARD_EVENTS_PREFIX = 'events.out.tfevents'
  14. LOGS_PREFIX = 'log_'
  15. class DeciPlatformSGLogger(BaseSGLogger):
  16. """Logger responsible to push logs and tensorboard artifacts to Deci platform."""
  17. def __init__(self, **kwargs):
  18. if _imported_deci_lab_failure is not None:
  19. raise _imported_deci_lab_failure
  20. auth_token = os.getenv("DECI_PLATFORM_TOKEN")
  21. if auth_token is None:
  22. raise ValueError('The environment variable "DECI_PLATFORM_TOKEN" is required in order to use '
  23. 'DeciPlatformSGLogger. Please set it with your own credentials '
  24. '(available in https://console.deci.ai/settings)')
  25. super().__init__(**kwargs)
  26. self.platform_client = DeciPlatformClient()
  27. self.platform_client.login(token=auth_token)
  28. self.platform_client.register_experiment(name=kwargs["experiment_name"])
  29. self.checkpoints_dir_path = kwargs["checkpoints_dir_path"]
  30. @multi_process_safe
  31. def upload(self):
  32. """
  33. Upload both to the destination specified by the user (base behavior), and to Deci platform.
  34. """
  35. # Upload to the destination specified by the user
  36. super(DeciPlatformSGLogger, self).upload()
  37. # Upload to Deci platform
  38. if not os.path.isdir(self.checkpoints_dir_path):
  39. raise ValueError('Provided directory does not exist')
  40. self._upload_latest_file_starting_with(start_with=TENSORBOARD_EVENTS_PREFIX)
  41. self._upload_latest_file_starting_with(start_with=LOGS_PREFIX)
  42. @multi_process_safe
  43. def _upload_latest_file_starting_with(self, start_with: str):
  44. """
  45. Upload the most recent file starting with a specific prefix to the Deci platform.
  46. :param start_with: prefix of the file to upload
  47. """
  48. files_path = [
  49. os.path.join(self.checkpoints_dir_path, file_name)
  50. for file_name in os.listdir(self.checkpoints_dir_path)
  51. if file_name.startswith(start_with)
  52. ]
  53. most_recent_file_path = max(files_path, key=os.path.getctime)
  54. self.platform_client.save_experiment_file(file_path=most_recent_file_path)
  55. logger.info(f"File saved to Deci platform: {most_recent_file_path}")
Discard
Tip!

Press p or to see the previous file or, n or to see the next file