Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

crash_tips_test.py 4.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
  1. import sys
  2. import unittest
  3. import dataclasses
  4. from typing import Type
  5. import omegaconf
  6. from super_gradients.common.crash_handler.crash_tips import (
  7. get_relevant_crash_tip_message,
  8. CrashTip,
  9. TorchCudaMissingTip,
  10. RecipeFactoryFormatTip,
  11. DDPNotInitializedTip,
  12. WrongHydraVersionTip,
  13. InterpolationKeyErrorTip,
  14. )
  15. @dataclasses.dataclass
  16. class DocumentedException:
  17. exc_value: Exception
  18. expected_crash_tip: Type[CrashTip]
  19. # author/person who faced this exception?
  20. class CrashTipTest(unittest.TestCase):
  21. def setUp(self) -> None:
  22. # Add any exception that we want to support here to make sure that it will be handled by our crash tip handler
  23. self.documented_exceptions = [
  24. DocumentedException(
  25. exc_value=OSError(
  26. "/home/tomer.keren/.conda/envs/tomer-dev-sg3/lib/python3.10/site-packages/torch/lib/../../nvidia/cublas/lib/libcublas.so.11: symbol "
  27. "cublasLtHSHMatmulAlgoInit version libcublasLt.so.11 not defined in file libcublasLt.so.11 with link time reference"
  28. ),
  29. expected_crash_tip=TorchCudaMissingTip,
  30. ),
  31. DocumentedException(
  32. exc_value=RuntimeError(
  33. "Malformed object definition in configuration. Expecting either a string of object type or a single entry dictionary{type_name(str): "
  34. "{parameters...}}.received: {'my_callback': None, 'lr_step': 2.4}"
  35. ),
  36. expected_crash_tip=RecipeFactoryFormatTip,
  37. ),
  38. DocumentedException(
  39. exc_value=RuntimeError("Default process group has not been initialized, please make sure to call init_process_group."),
  40. expected_crash_tip=DDPNotInitializedTip,
  41. ),
  42. DocumentedException(
  43. exc_value=TypeError("__init__() got an unexpected keyword argument 'version_base'"),
  44. expected_crash_tip=WrongHydraVersionTip,
  45. ),
  46. DocumentedException(
  47. exc_value=omegaconf.errors.InterpolationKeyError("omegaconf.errors.InterpolationKeyError: Interpolation key 'x' not found"),
  48. expected_crash_tip=InterpolationKeyErrorTip,
  49. ),
  50. ]
  51. def test_found_exceptions(self):
  52. """Test all the exceptions that were documented, and make sure that they have an associated tip."""
  53. for documented_exception in self.documented_exceptions:
  54. exc_value, expected_crash_tip = documented_exception.exc_value, documented_exception.expected_crash_tip
  55. try:
  56. raise exc_value
  57. except type(exc_value):
  58. exc_type, exc_value, exc_traceback = sys.exc_info()
  59. with self.subTest(
  60. msg="Making sure that the CrashTip is considered relevant for the exception...",
  61. expected_tip=expected_crash_tip.__name__,
  62. exception=exc_value,
  63. ):
  64. is_relevant = expected_crash_tip.is_relevant(exc_type, exc_value, exc_traceback)
  65. self.assertTrue(
  66. is_relevant,
  67. msg=f"Crash tip '{expected_crash_tip.__name__}' should be relevant for exception '{exc_type.__name__}' but failed.",
  68. )
  69. with self.subTest(
  70. msg="Making sure that the CrashTip generates a message (None is returned if an error is raised internally, to avoid crashing atexit)...",
  71. crash_tip=expected_crash_tip.__name__,
  72. ):
  73. crash_tip_msg = expected_crash_tip.get_message(exc_type, exc_value, exc_traceback)
  74. self.assertIsNotNone(
  75. crash_tip_msg,
  76. msg=f"The crash tip '{expected_crash_tip.__name__}' returned None, "
  77. f"an exception was probably raised in '{expected_crash_tip.__name__}.get_message(...)'",
  78. )
  79. with self.subTest(
  80. msg="Making sure that we can find the relevant CrashTip and get it's summary for the exception...",
  81. expected_tip=expected_crash_tip.__name__,
  82. exception=exc_value,
  83. ):
  84. crash_tip_message = get_relevant_crash_tip_message(exc_type, exc_value, exc_traceback)
  85. expected_crash_tip_message = expected_crash_tip.get_message(exc_type, exc_value, exc_traceback)
  86. self.assertEqual(
  87. crash_tip_message,
  88. expected_crash_tip_message,
  89. msg=f"Crash tip message should be '{expected_crash_tip_message}' but got '{crash_tip_message}' instead.",
  90. )
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...