Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

test_cuda.py 8.0 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
  1. # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
  2. from itertools import product
  3. from pathlib import Path
  4. import pytest
  5. import torch
  6. from tests import CUDA_DEVICE_COUNT, CUDA_IS_AVAILABLE, MODEL, SOURCE
  7. from ultralytics import YOLO
  8. from ultralytics.cfg import TASK2DATA, TASK2MODEL, TASKS
  9. from ultralytics.utils import ASSETS, IS_JETSON, WEIGHTS_DIR
  10. from ultralytics.utils.autodevice import GPUInfo
  11. from ultralytics.utils.checks import check_amp
  12. from ultralytics.utils.torch_utils import TORCH_1_13
  13. # Try to find idle devices if CUDA is available
  14. DEVICES = []
  15. if CUDA_IS_AVAILABLE:
  16. if IS_JETSON:
  17. DEVICES = [0] # NVIDIA Jetson only has one GPU and does not fully support pynvml library
  18. else:
  19. gpu_info = GPUInfo()
  20. gpu_info.print_status()
  21. autodevice_fraction = __import__("os").environ.get("YOLO_AUTODEVICE_FRACTION_FREE", 0.3)
  22. idle_gpus = gpu_info.select_idle_gpu(
  23. count=2, min_memory_fraction=autodevice_fraction, min_util_fraction=autodevice_fraction
  24. )
  25. if idle_gpus:
  26. DEVICES = idle_gpus
  27. def test_checks():
  28. """Validate CUDA settings against torch CUDA functions."""
  29. assert torch.cuda.is_available() == CUDA_IS_AVAILABLE
  30. assert torch.cuda.device_count() == CUDA_DEVICE_COUNT
  31. @pytest.mark.skipif(not DEVICES, reason="No CUDA devices available")
  32. def test_amp():
  33. """Test AMP training checks."""
  34. model = YOLO("yolo11n.pt").model.to(f"cuda:{DEVICES[0]}")
  35. assert check_amp(model)
  36. @pytest.mark.slow
  37. @pytest.mark.skipif(not DEVICES, reason="No CUDA devices available")
  38. @pytest.mark.parametrize(
  39. "task, dynamic, int8, half, batch, simplify, nms",
  40. [ # generate all combinations except for exclusion cases
  41. (task, dynamic, int8, half, batch, simplify, nms)
  42. for task, dynamic, int8, half, batch, simplify, nms in product(
  43. TASKS, [True, False], [False], [False], [1, 2], [True, False], [True, False]
  44. )
  45. if not (
  46. (int8 and half) or (task == "classify" and nms) or (task == "obb" and nms and (not TORCH_1_13 or IS_JETSON))
  47. )
  48. ],
  49. )
  50. def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):
  51. """Test YOLO exports to ONNX format with various configurations and parameters."""
  52. file = YOLO(TASK2MODEL[task]).export(
  53. format="onnx",
  54. imgsz=32,
  55. dynamic=dynamic,
  56. int8=int8,
  57. half=half,
  58. batch=batch,
  59. simplify=simplify,
  60. nms=nms and task != "obb", # disable NMS for OBB task for now on T4 instance
  61. device=DEVICES[0],
  62. )
  63. YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32, device=DEVICES[0]) # exported model inference
  64. Path(file).unlink() # cleanup
  65. @pytest.mark.slow
  66. @pytest.mark.skipif(True, reason="CUDA export tests disabled pending additional Ultralytics GPU server availability")
  67. @pytest.mark.skipif(not DEVICES, reason="No CUDA devices available")
  68. @pytest.mark.parametrize(
  69. "task, dynamic, int8, half, batch",
  70. [ # generate all combinations but exclude those where both int8 and half are True
  71. (task, dynamic, int8, half, batch)
  72. # Note: tests reduced below pending compute availability expansion as GPU CI runner utilization is high
  73. # for task, dynamic, int8, half, batch in product(TASKS, [True, False], [True, False], [True, False], [1, 2])
  74. for task, dynamic, int8, half, batch in product(TASKS, [True], [True], [False], [2])
  75. if not (int8 and half) # exclude cases where both int8 and half are True
  76. ],
  77. )
  78. def test_export_engine_matrix(task, dynamic, int8, half, batch):
  79. """Test YOLO model export to TensorRT format for various configurations and run inference."""
  80. file = YOLO(TASK2MODEL[task]).export(
  81. format="engine",
  82. imgsz=32,
  83. dynamic=dynamic,
  84. int8=int8,
  85. half=half,
  86. batch=batch,
  87. data=TASK2DATA[task],
  88. workspace=1, # reduce workspace GB for less resource utilization during testing
  89. simplify=True,
  90. device=DEVICES[0],
  91. )
  92. YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32, device=DEVICES[0]) # exported model inference
  93. Path(file).unlink() # cleanup
  94. Path(file).with_suffix(".cache").unlink() if int8 else None # cleanup INT8 cache
  95. @pytest.mark.skipif(not DEVICES, reason="No CUDA devices available")
  96. def test_train():
  97. """Test model training on a minimal dataset using available CUDA devices."""
  98. import os
  99. device = tuple(DEVICES) if len(DEVICES) > 1 else DEVICES[0]
  100. results = YOLO(MODEL).train(data="coco8.yaml", imgsz=64, epochs=1, device=device) # requires imgsz>=64
  101. # NVIDIA Jetson only has one GPU and therefore skipping checks
  102. if not IS_JETSON:
  103. visible = eval(os.environ["CUDA_VISIBLE_DEVICES"])
  104. assert visible == device, f"Passed GPUs '{device}', but used GPUs '{visible}'"
  105. assert (
  106. (results is None) if len(DEVICES) > 1 else (results is not None)
  107. ) # DDP returns None, single-GPU returns metrics
  108. @pytest.mark.slow
  109. @pytest.mark.skipif(not DEVICES, reason="No CUDA devices available")
  110. def test_predict_multiple_devices():
  111. """Validate model prediction consistency across CPU and CUDA devices."""
  112. model = YOLO("yolo11n.pt")
  113. # Test CPU
  114. model = model.cpu()
  115. assert str(model.device) == "cpu"
  116. _ = model(SOURCE)
  117. assert str(model.device) == "cpu"
  118. # Test CUDA
  119. cuda_device = f"cuda:{DEVICES[0]}"
  120. model = model.to(cuda_device)
  121. assert str(model.device) == cuda_device
  122. _ = model(SOURCE)
  123. assert str(model.device) == cuda_device
  124. # Test CPU again
  125. model = model.cpu()
  126. assert str(model.device) == "cpu"
  127. _ = model(SOURCE)
  128. assert str(model.device) == "cpu"
  129. # Test CUDA again
  130. model = model.to(cuda_device)
  131. assert str(model.device) == cuda_device
  132. _ = model(SOURCE)
  133. assert str(model.device) == cuda_device
  134. @pytest.mark.skipif(not DEVICES, reason="No CUDA devices available")
  135. def test_autobatch():
  136. """Check optimal batch size for YOLO model training using autobatch utility."""
  137. from ultralytics.utils.autobatch import check_train_batch_size
  138. check_train_batch_size(YOLO(MODEL).model.to(f"cuda:{DEVICES[0]}"), imgsz=128, amp=True)
  139. @pytest.mark.slow
  140. @pytest.mark.skipif(True, reason="Skip for now since T4 instance does not support TensorRT > 10.0")
  141. def test_utils_benchmarks():
  142. """Profile YOLO models for performance benchmarks."""
  143. from ultralytics.utils.benchmarks import ProfileModels
  144. # Pre-export a dynamic engine model to use dynamic inference
  145. YOLO(MODEL).export(format="engine", imgsz=32, dynamic=True, batch=1, device=DEVICES[0])
  146. ProfileModels(
  147. [MODEL],
  148. imgsz=32,
  149. half=False,
  150. min_time=1,
  151. num_timed_runs=3,
  152. num_warmup_runs=1,
  153. device=DEVICES[0],
  154. ).run()
  155. @pytest.mark.skipif(not DEVICES, reason="No CUDA devices available")
  156. def test_predict_sam():
  157. """Test SAM model predictions using different prompts."""
  158. from ultralytics import SAM
  159. from ultralytics.models.sam import Predictor as SAMPredictor
  160. model = SAM(WEIGHTS_DIR / "sam2.1_b.pt")
  161. model.info()
  162. # Run inference with various prompts
  163. model(SOURCE, device=DEVICES[0])
  164. model(SOURCE, bboxes=[439, 437, 524, 709], device=DEVICES[0])
  165. model(ASSETS / "zidane.jpg", points=[900, 370], device=DEVICES[0])
  166. model(ASSETS / "zidane.jpg", points=[900, 370], labels=[1], device=DEVICES[0])
  167. model(ASSETS / "zidane.jpg", points=[[900, 370]], labels=[1], device=DEVICES[0])
  168. model(ASSETS / "zidane.jpg", points=[[400, 370], [900, 370]], labels=[1, 1], device=DEVICES[0])
  169. model(ASSETS / "zidane.jpg", points=[[[900, 370], [1000, 100]]], labels=[[1, 1]], device=DEVICES[0])
  170. # Test predictor
  171. predictor = SAMPredictor(
  172. overrides=dict(
  173. conf=0.25,
  174. task="segment",
  175. mode="predict",
  176. imgsz=1024,
  177. model=WEIGHTS_DIR / "mobile_sam.pt",
  178. device=DEVICES[0],
  179. )
  180. )
  181. predictor.set_image(ASSETS / "zidane.jpg")
  182. # predictor(bboxes=[439, 437, 524, 709])
  183. # predictor(points=[900, 370], labels=[1])
  184. predictor.reset_image()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...