@@ -143,8 +143,11 @@ To train a YOLO11 model using JupyterLab:
 
                             5. Visualize training results using JupyterLab's built-in plotting capabilities:
                
 
                                 ```python
                
 
                            -    %matplotlib inline
                
 
                            +    import matplotlib
                
 
                            +
                
 
                                 from ultralytics.utils.plotting import plot_results
                
 
                            +
                
 
                            +    matplotlib.use("inline")  # or 'notebook' for interactive
                
 
                                 plot_results(results)
                
 
                                 ```
                
@@ -325,7 +325,7 @@ To use YOLOv7 ONNX model with Ultralytics:
 
                             2. Install the `TensorRT` Python package:
                
 
                            -    ```python
                
 
                            +    ```bash
                
 
                                 pip install tensorrt
                
 
                                 ```
                
@@ -43,14 +43,6 @@ keywords: ultralytics, plotting, utilities, documentation, data visualization, a
 
                             <br><br><hr><br>
                
 
                            -## ::: ultralytics.utils.plotting.output_to_target
                
 
                            -
                
 
                            -<br><br><hr><br>
                
 
                            -
                
 
                            -## ::: ultralytics.utils.plotting.output_to_rotated_target
                
 
                            -
                
 
                            -<br><br><hr><br>
                
 
                            -
                
 
                             ## ::: ultralytics.utils.plotting.feature_visualization
                
 
                             <br><br>
                
@@ -76,16 +76,21 @@ Train YOLO11n-cls on the MNIST160 dataset for 100 [epochs](https://www.ultralyti
 
                                 Ultralytics YOLO classification uses [torchvision.transforms.RandomResizedCrop](https://docs.pytorch.org/vision/stable/generated/torchvision.transforms.RandomResizedCrop.html) for training augmentation and [torchvision.transforms.CenterCrop](https://docs.pytorch.org/vision/stable/generated/torchvision.transforms.CenterCrop.html) for validation/inference.
                
 
                                 For images with extreme aspect ratios, consider using [torchvision.transforms.Resize](https://docs.pytorch.org/vision/stable/generated/torchvision.transforms.Resize.html) instead. The example below shows how to customize augmentations for classification training.
                
 
                            +
                
 
                                 ```python
                
 
                                 import torch
                
 
                                 import torchvision.transforms as T
                
 
                            +    from ultralytics import YOLO
                
 
                                 from ultralytics.data.dataset import ClassificationDataset
                
 
                                 from ultralytics.models.yolo.classify import ClassificationTrainer
                
 
                                 class CustomizedDataset(ClassificationDataset):
                
 
                            +        """A customized dataset class for image classification with enhanced data augmentation transforms."""
                
 
                            +
                
 
                                     def __init__(self, root: str, args, augment: bool = False, prefix: str = ""):
                
 
                            +            """Initialize a customized classification dataset with enhanced data augmentation transforms."""
                
 
                                         super().__init__(root, args, augment, prefix)
                
 
                                         train_transforms = T.Compose(
                
 
                                             [
                
@@ -110,12 +115,13 @@ Train YOLO11n-cls on the MNIST160 dataset for 100 [epochs](https://www.ultralyti
 
                                 class CustomizedTrainer(ClassificationTrainer):
                
 
                            +        """A customized trainer class for YOLO classification models with enhanced dataset handling."""
                
 
                            +
                
 
                                     def build_dataset(self, img_path: str, mode: str = "train", batch=None):
                
 
                            +            """Build a customized dataset for classification training or validation."""
                
 
                                         return CustomizedDataset(root=img_path, args=self.args, augment=mode == "train", prefix=mode)
                
 
                            -    from ultralytics import YOLO
                
 
                            -
                
 
                                 model = YOLO("yolo11n-cls.pt")
                
 
                                 model.train(data="imagenet1000", trainer=CustomizedTrainer, epochs=10, imgsz=224, batch=64)
                
 
                                 ```
                
@@ -1,6 +1,6 @@
 
                             # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
                
 
                            -__version__ = "8.3.153"
                
 
                            +__version__ = "8.3.154"
                
 
                             import os
                
@@ -82,7 +82,6 @@ class BaseValidator:
 
                                     update_metrics: Update metrics based on predictions and batch.
                
 
                                     finalize_metrics: Finalize and return all metrics.
                
 
                                     get_stats: Return statistics about the model's performance.
                
 
                            -        check_stats: Check statistics.
                
 
                                     print_results: Print the results of the model's predictions.
                
 
                                     get_desc: Get description of the YOLO model.
                
 
                                     on_plot: Register plots for visualization.
                
@@ -226,7 +225,6 @@ class BaseValidator:
 
                                         self.run_callbacks("on_val_batch_end")
                
 
                                     stats = self.get_stats()
                
 
                            -        self.check_stats(stats)
                
 
                                     self.speed = dict(zip(self.speed.keys(), (x.t / len(self.dataloader.dataset) * 1e3 for x in dt)))
                
 
                                     self.finalize_metrics()
                
 
                                     self.print_results()
                
@@ -334,10 +332,6 @@ class BaseValidator:
 
                                     """Return statistics about the model's performance."""
                
 
                                     return {}
                
 
                            -    def check_stats(self, stats):
                
 
                            -        """Check statistics."""
                
 
                            -        pass
                
 
                            -
                
 
                                 def print_results(self):
                
 
                                     """Print the results of the model's predictions."""
                
 
                                     pass
                
@@ -1,7 +1,6 @@
 
                             # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
                
 
                             from ultralytics.models.yolo.segment import SegmentationValidator
                
 
                            -from ultralytics.utils.metrics import SegmentMetrics
                
 
                             class FastSAMValidator(SegmentationValidator):
                
@@ -39,4 +38,3 @@ class FastSAMValidator(SegmentationValidator):
 
                                     super().__init__(dataloader, save_dir, args, _callbacks)
                
 
                                     self.args.task = "segment"
                
 
                                     self.args.plots = False  # disable ConfusionMatrix and other plots to avoid errors
                
 
                            -        self.metrics = SegmentMetrics(save_dir=self.save_dir)
                
@@ -1,5 +1,7 @@
 
                             # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
                
 
                            +from typing import Any, Dict, List, Tuple, Union
                
 
                            +
                
 
                             import torch
                
 
                             from ultralytics.data import YOLODataset
                
@@ -151,15 +153,21 @@ class RTDETRValidator(DetectionValidator):
 
                                         data=self.data,
                
 
                                     )
                
 
                            -    def postprocess(self, preds):
                
 
                            +    def postprocess(
                
 
                            +        self, preds: Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]]
                
 
                            +    ) -> List[Dict[str, torch.Tensor]]:
                
 
                                     """
                
 
                                     Apply Non-maximum suppression to prediction outputs.
                
 
                                     Args:
                
 
                            -            preds (list | tuple | torch.Tensor): Raw predictions from the model.
                
 
                            +            preds (torch.Tensor | List | Tuple): Raw predictions from the model. If tensor, should have shape
                
 
                            +                (batch_size, num_predictions, num_classes + 4) where last dimension contains bbox coords and class scores.
                
 
                                     Returns:
                
 
                            -            (list[torch.Tensor]): List of processed predictions for each image in batch.
                
 
                            +            (List[Dict[str, torch.Tensor]]): List of dictionaries for each image, each containing:
                
 
                            +                - 'bboxes': Tensor of shape (N, 4) with bounding box coordinates
                
 
                            +                - 'conf': Tensor of shape (N,) with confidence scores
                
 
                            +                - 'cls': Tensor of shape (N,) with class indices
                
 
                                     """
                
 
                                     if not isinstance(preds, (list, tuple)):  # list for PyTorch inference but list[0] Tensor for export inference
                
 
                                         preds = [preds, None]
                
@@ -176,18 +184,19 @@ class RTDETRValidator(DetectionValidator):
 
                                         pred = pred[score.argsort(descending=True)]
                
 
                                         outputs[i] = pred[score > self.args.conf]
                
 
                            -        return outputs
                
 
                            +        return [{"bboxes": x[:, :4], "conf": x[:, 4], "cls": x[:, 5]} for x in outputs]
                
 
                            -    def _prepare_batch(self, si, batch):
                
 
                            +    def _prepare_batch(self, si: int, batch: Dict[str, Any]) -> Dict[str, Any]:
                
 
                                     """
                
 
                                     Prepare a batch for validation by applying necessary transformations.
                
 
                                     Args:
                
 
                                         si (int): Batch index.
                
 
                            -            batch (dict): Batch data containing images and annotations.
                
 
                            +            batch (Dict[str, Any]): Batch data containing images and annotations.
                
 
                                     Returns:
                
 
                            -            (dict): Prepared batch with transformed annotations.
                
 
                            +            (Dict[str, Any]): Prepared batch with transformed annotations containing cls, bboxes,
                
 
                            +                ori_shape, imgsz, and ratio_pad.
                
 
                                     """
                
 
                                     idx = batch["batch_idx"] == si
                
 
                                     cls = batch["cls"][idx].squeeze(-1)
                
@@ -199,20 +208,23 @@ class RTDETRValidator(DetectionValidator):
 
                                         bbox = ops.xywh2xyxy(bbox)  # target boxes
                
 
                                         bbox[..., [0, 2]] *= ori_shape[1]  # native-space pred
                
 
                                         bbox[..., [1, 3]] *= ori_shape[0]  # native-space pred
                
 
                            -        return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
                
 
                            +        return {"cls": cls, "bboxes": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
                
 
                            -    def _prepare_pred(self, pred, pbatch):
                
 
                            +    def _prepare_pred(self, pred: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
                
 
                                     """
                
 
                                     Prepare predictions by scaling bounding boxes to original image dimensions.
                
 
                                     Args:
                
 
                            -            pred (torch.Tensor): Raw predictions.
                
 
                            -            pbatch (dict): Prepared batch information.
                
 
                            +            pred (Dict[str, torch.Tensor]): Raw predictions containing 'cls', 'bboxes', and 'conf'.
                
 
                            +            pbatch (Dict[str, torch.Tensor]): Prepared batch information containing 'ori_shape' and other metadata.
                
 
                                     Returns:
                
 
                            -            (torch.Tensor): Predictions scaled to original image dimensions.
                
 
                            +            (Dict[str, torch.Tensor]): Predictions scaled to original image dimensions.
                
 
                                     """
                
 
                            -        predn = pred.clone()
                
 
                            -        predn[..., [0, 2]] *= pbatch["ori_shape"][1] / self.args.imgsz  # native-space pred
                
 
                            -        predn[..., [1, 3]] *= pbatch["ori_shape"][0] / self.args.imgsz  # native-space pred
                
 
                            -        return predn.float()
                
 
                            +        cls = pred["cls"]
                
 
                            +        if self.args.single_cls:
                
 
                            +            cls *= 0
                
 
                            +        bboxes = pred["bboxes"].clone()
                
 
                            +        bboxes[..., [0, 2]] *= pbatch["ori_shape"][1] / self.args.imgsz  # native-space pred
                
 
                            +        bboxes[..., [1, 3]] *= pbatch["ori_shape"][0] / self.args.imgsz  # native-space pred
                
 
                            +        return {"bboxes": bboxes, "conf": pred["conf"], "cls": cls}
                
@@ -1,5 +1,8 @@
 
                             # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
                
 
                            +from pathlib import Path
                
 
                            +from typing import Any, Dict, List, Tuple, Union
                
 
                            +
                
 
                             import torch
                
 
                             from ultralytics.data import ClassificationDataset, build_dataloader
                
@@ -48,7 +51,7 @@ class ClassificationValidator(BaseValidator):
 
                                     Torchvision classification models can also be passed to the 'model' argument, i.e. model='resnet18'.
                
 
                                 """
                
 
                            -    def __init__(self, dataloader=None, save_dir=None, args=None, _callbacks=None):
                
 
                            +    def __init__(self, dataloader=None, save_dir=None, args=None, _callbacks=None) -> None:
                
 
                                     """
                
 
                                     Initialize ClassificationValidator with dataloader, save directory, and other parameters.
                
@@ -70,28 +73,26 @@ class ClassificationValidator(BaseValidator):
 
                                     self.args.task = "classify"
                
 
                                     self.metrics = ClassifyMetrics()
                
 
                            -    def get_desc(self):
                
 
                            +    def get_desc(self) -> str:
                
 
                                     """Return a formatted string summarizing classification metrics."""
                
 
                                     return ("%22s" + "%11s" * 2) % ("classes", "top1_acc", "top5_acc")
                
 
                            -    def init_metrics(self, model):
                
 
                            +    def init_metrics(self, model: torch.nn.Module) -> None:
                
 
                                     """Initialize confusion matrix, class names, and tracking containers for predictions and targets."""
                
 
                                     self.names = model.names
                
 
                                     self.nc = len(model.names)
                
 
                            -        self.confusion_matrix = ConfusionMatrix(
                
 
                            -            nc=self.nc, conf=self.args.conf, names=self.names.values(), task="classify"
                
 
                            -        )
                
 
                                     self.pred = []
                
 
                                     self.targets = []
                
 
                            +        self.confusion_matrix = ConfusionMatrix(names=list(model.names.values()))
                
 
                            -    def preprocess(self, batch):
                
 
                            +    def preprocess(self, batch: Dict[str, Any]) -> Dict[str, Any]:
                
 
                                     """Preprocess input batch by moving data to device and converting to appropriate dtype."""
                
 
                                     batch["img"] = batch["img"].to(self.device, non_blocking=True)
                
 
                                     batch["img"] = batch["img"].half() if self.args.half else batch["img"].float()
                
 
                                     batch["cls"] = batch["cls"].to(self.device)
                
 
                                     return batch
                
 
                            -    def update_metrics(self, preds, batch):
                
 
                            +    def update_metrics(self, preds: torch.Tensor, batch: Dict[str, Any]) -> None:
                
 
                                     """
                
 
                                     Update running metrics with model predictions and batch targets.
                
@@ -127,23 +128,23 @@ class ClassificationValidator(BaseValidator):
 
                                         for normalize in True, False:
                
 
                                             self.confusion_matrix.plot(save_dir=self.save_dir, normalize=normalize, on_plot=self.on_plot)
                
 
                                     self.metrics.speed = self.speed
                
 
                            -        self.metrics.confusion_matrix = self.confusion_matrix
                
 
                                     self.metrics.save_dir = self.save_dir
                
 
                            +        self.metrics.confusion_matrix = self.confusion_matrix
                
 
                            -    def postprocess(self, preds):
                
 
                            +    def postprocess(self, preds: Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]]) -> torch.Tensor:
                
 
                                     """Extract the primary prediction from model output if it's in a list or tuple format."""
                
 
                                     return preds[0] if isinstance(preds, (list, tuple)) else preds
                
 
                            -    def get_stats(self):
                
 
                            +    def get_stats(self) -> Dict[str, float]:
                
 
                                     """Calculate and return a dictionary of metrics by processing targets and predictions."""
                
 
                                     self.metrics.process(self.targets, self.pred)
                
 
                                     return self.metrics.results_dict
                
 
                            -    def build_dataset(self, img_path):
                
 
                            +    def build_dataset(self, img_path: str) -> ClassificationDataset:
                
 
                                     """Create a ClassificationDataset instance for validation."""
                
 
                                     return ClassificationDataset(root=img_path, args=self.args, augment=False, prefix=self.args.split)
                
 
                            -    def get_dataloader(self, dataset_path, batch_size):
                
 
                            +    def get_dataloader(self, dataset_path: Union[Path, str], batch_size: int) -> torch.utils.data.DataLoader:
                
 
                                     """
                
 
                                     Build and return a data loader for classification validation.
                
@@ -157,17 +158,17 @@ class ClassificationValidator(BaseValidator):
 
                                     dataset = self.build_dataset(dataset_path)
                
 
                                     return build_dataloader(dataset, batch_size, self.args.workers, rank=-1)
                
 
                            -    def print_results(self):
                
 
                            +    def print_results(self) -> None:
                
 
                                     """Print evaluation metrics for the classification model."""
                
 
                                     pf = "%22s" + "%11.3g" * len(self.metrics.keys)  # print format
                
 
                                     LOGGER.info(pf % ("all", self.metrics.top1, self.metrics.top5))
                
 
                            -    def plot_val_samples(self, batch, ni):
                
 
                            +    def plot_val_samples(self, batch: Dict[str, Any], ni: int) -> None:
                
 
                                     """
                
 
                                     Plot validation image samples with their ground truth labels.
                
 
                                     Args:
                
 
                            -            batch (dict): Dictionary containing batch data with 'img' (images) and 'cls' (class labels).
                
 
                            +            batch (Dict[str, Any]): Dictionary containing batch data with 'img' (images) and 'cls' (class labels).
                
 
                                         ni (int): Batch index used for naming the output file.
                
 
                                     Examples:
                
@@ -175,21 +176,20 @@ class ClassificationValidator(BaseValidator):
 
                                         >>> batch = {"img": torch.rand(16, 3, 224, 224), "cls": torch.randint(0, 10, (16,))}
                
 
                                         >>> validator.plot_val_samples(batch, 0)
                
 
                                     """
                
 
                            +        batch["batch_idx"] = torch.arange(len(batch["img"]))  # add batch index for plotting
                
 
                                     plot_images(
                
 
                            -            images=batch["img"],
                
 
                            -            batch_idx=torch.arange(len(batch["img"])),
                
 
                            -            cls=batch["cls"].view(-1),  # warning: use .view(), not .squeeze() for Classify models
                
 
                            +            labels=batch,
                
 
                                         fname=self.save_dir / f"val_batch{ni}_labels.jpg",
                
 
                                         names=self.names,
                
 
                                         on_plot=self.on_plot,
                
 
                                     )
                
 
                            -    def plot_predictions(self, batch, preds, ni):
                
 
                            +    def plot_predictions(self, batch: Dict[str, Any], preds: torch.Tensor, ni: int) -> None:
                
 
                                     """
                
 
                                     Plot images with their predicted class labels and save the visualization.
                
 
                                     Args:
                
 
                            -            batch (dict): Batch data containing images and other information.
                
 
                            +            batch (Dict[str, Any]): Batch data containing images and other information.
                
 
                                         preds (torch.Tensor): Model predictions with shape (batch_size, num_classes).
                
 
                                         ni (int): Batch index used for naming the output file.
                
@@ -199,10 +199,13 @@ class ClassificationValidator(BaseValidator):
 
                                         >>> preds = torch.rand(16, 10)  # 16 images, 10 classes
                
 
                                         >>> validator.plot_predictions(batch, preds, 0)
                
 
                                     """
                
 
                            -        plot_images(
                
 
                            -            batch["img"],
                
 
                            +        batched_preds = dict(
                
 
                            +            img=batch["img"],
                
 
                                         batch_idx=torch.arange(len(batch["img"])),
                
 
                                         cls=torch.argmax(preds, dim=1),
                
 
                            +        )
                
 
                            +        plot_images(
                
 
                            +            batched_preds,
                
 
                                         fname=self.save_dir / f"val_batch{ni}_pred.jpg",
                
 
                                         names=self.names,
                
 
                                         on_plot=self.on_plot,
                
@@ -3,7 +3,7 @@
 
                             import math
                
 
                             import random
                
 
                             from copy import copy
                
 
                            -from typing import Dict, List, Optional
                
 
                            +from typing import Any, Dict, List, Optional
                
 
                             import numpy as np
                
 
                             import torch.nn as nn
                
@@ -178,19 +178,16 @@ class DetectionTrainer(BaseTrainer):
 
                                         "Size",
                
 
                                     )
                
 
                            -    def plot_training_samples(self, batch: Dict, ni: int):
                
 
                            +    def plot_training_samples(self, batch: Dict[str, Any], ni: int) -> None:
                
 
                                     """
                
 
                                     Plot training samples with their annotations.
                
 
                                     Args:
                
 
                            -            batch (Dict): Dictionary containing batch data.
                
 
                            +            batch (Dict[str, Any]): Dictionary containing batch data.
                
 
                                         ni (int): Number of iterations.
                
 
                                     """
                
 
                                     plot_images(
                
 
                            -            images=batch["img"],
                
 
                            -            batch_idx=batch["batch_idx"],
                
 
                            -            cls=batch["cls"].squeeze(-1),
                
 
                            -            bboxes=batch["bboxes"],
                
 
                            +            labels=batch,
                
 
                                         paths=batch["im_file"],
                
 
                                         fname=self.save_dir / f"train_batch{ni}.jpg",
                
 
                                         on_plot=self.on_plot,
                
@@ -12,7 +12,7 @@ from ultralytics.engine.validator import BaseValidator
 
                             from ultralytics.utils import LOGGER, ops
                
 
                             from ultralytics.utils.checks import check_requirements
                
 
                             from ultralytics.utils.metrics import ConfusionMatrix, DetMetrics, box_iou
                
 
                            -from ultralytics.utils.plotting import output_to_target, plot_images
                
 
                            +from ultralytics.utils.plotting import plot_images
                
 
                             class DetectionValidator(BaseValidator):
                
@@ -23,8 +23,6 @@ class DetectionValidator(BaseValidator):
 
                                 prediction processing, and visualization of results.
                
 
                                 Attributes:
                
 
                            -        nt_per_class (np.ndarray): Number of targets per class.
                
 
                            -        nt_per_image (np.ndarray): Number of targets per image.
                
 
                                     is_coco (bool): Whether the dataset is COCO.
                
 
                                     is_lvis (bool): Whether the dataset is LVIS.
                
 
                                     class_map (List[int]): Mapping from model class indices to dataset class indices.
                
@@ -53,15 +51,13 @@ class DetectionValidator(BaseValidator):
 
                                         _callbacks (List[Any], optional): List of callback functions.
                
 
                                     """
                
 
                                     super().__init__(dataloader, save_dir, args, _callbacks)
                
 
                            -        self.nt_per_class = None
                
 
                            -        self.nt_per_image = None
                
 
                                     self.is_coco = False
                
 
                                     self.is_lvis = False
                
 
                                     self.class_map = None
                
 
                                     self.args.task = "detect"
                
 
                            -        self.metrics = DetMetrics(save_dir=self.save_dir)
                
 
                                     self.iouv = torch.linspace(0.5, 0.95, 10)  # IoU vector for mAP@0.5:0.95
                
 
                                     self.niou = self.iouv.numel()
                
 
                            +        self.metrics = DetMetrics()
                
 
                                 def preprocess(self, batch: Dict[str, Any]) -> Dict[str, Any]:
                
 
                                     """
                
@@ -99,18 +95,16 @@ class DetectionValidator(BaseValidator):
 
                                     self.names = model.names
                
 
                                     self.nc = len(model.names)
                
 
                                     self.end2end = getattr(model, "end2end", False)
                
 
                            -        self.metrics.names = self.names
                
 
                            -        self.metrics.plot = self.args.plots
                
 
                            -        self.confusion_matrix = ConfusionMatrix(nc=self.nc, conf=self.args.conf, names=self.names.values())
                
 
                                     self.seen = 0
                
 
                                     self.jdict = []
                
 
                            -        self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
                
 
                            +        self.metrics.names = self.names
                
 
                            +        self.confusion_matrix = ConfusionMatrix(names=list(model.names.values()))
                
 
                                 def get_desc(self) -> str:
                
 
                                     """Return a formatted string summarizing class metrics of YOLO model."""
                
 
                                     return ("%22s" + "%11s" * 6) % ("Class", "Images", "Instances", "Box(P", "R", "mAP50", "mAP50-95)")
                
 
                            -    def postprocess(self, preds: torch.Tensor) -> List[torch.Tensor]:
                
 
                            +    def postprocess(self, preds: torch.Tensor) -> List[Dict[str, torch.Tensor]]:
                
 
                                     """
                
 
                                     Apply Non-maximum suppression to prediction outputs.
                
@@ -118,9 +112,10 @@ class DetectionValidator(BaseValidator):
 
                                         preds (torch.Tensor): Raw predictions from the model.
                
 
                                     Returns:
                
 
                            -            (List[torch.Tensor]): Processed predictions after NMS.
                
 
                            +            (List[Dict[str, torch.Tensor]]): Processed predictions after NMS, where each dict contains
                
 
                            +                'bboxes', 'conf', 'cls', and 'extra' tensors.
                
 
                                     """
                
 
                            -        return ops.non_max_suppression(
                
 
                            +        outputs = ops.non_max_suppression(
                
 
                                         preds,
                
 
                                         self.args.conf,
                
 
                                         self.args.iou,
                
@@ -131,6 +126,7 @@ class DetectionValidator(BaseValidator):
 
                                         end2end=self.end2end,
                
 
                                         rotated=self.args.task == "obb",
                
 
                                     )
                
 
                            +        return [{"bboxes": x[:, :4], "conf": x[:, 4], "cls": x[:, 5], "extra": x[:, 6:]} for x in outputs]
                
 
                                 def _prepare_batch(self, si: int, batch: Dict[str, Any]) -> Dict[str, Any]:
                
 
                                     """
                
@@ -152,68 +148,60 @@ class DetectionValidator(BaseValidator):
 
                                     if len(cls):
                
 
                                         bbox = ops.xywh2xyxy(bbox) * torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]  # target boxes
                
 
                                         ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad)  # native-space labels
                
 
                            -        return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
                
 
                            +        return {"cls": cls, "bboxes": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
                
 
                            -    def _prepare_pred(self, pred: torch.Tensor, pbatch: Dict[str, Any]) -> torch.Tensor:
                
 
                            +    def _prepare_pred(self, pred: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
                
 
                                     """
                
 
                                     Prepare predictions for evaluation against ground truth.
                
 
                                     Args:
                
 
                            -            pred (torch.Tensor): Model predictions.
                
 
                            +            pred (Dict[str, torch.Tensor]): Post-processed predictions from the model.
                
 
                                         pbatch (Dict[str, Any]): Prepared batch information.
                
 
                                     Returns:
                
 
                            -            (torch.Tensor): Prepared predictions in native space.
                
 
                            -        """
                
 
                            -        predn = pred.clone()
                
 
                            -        ops.scale_boxes(
                
 
                            -            pbatch["imgsz"], predn[:, :4], pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"]
                
 
                            +            (Dict[str, torch.Tensor]): Prepared predictions in native space.
                
 
                            +        """
                
 
                            +        cls = pred["cls"]
                
 
                            +        if self.args.single_cls:
                
 
                            +            cls *= 0
                
 
                            +        # predn = pred.clone()
                
 
                            +        bboxes = ops.scale_boxes(
                
 
                            +            pbatch["imgsz"], pred["bboxes"].clone(), pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"]
                
 
                                     )  # native-space pred
                
 
                            -        return predn
                
 
                            +        return {"bboxes": bboxes, "conf": pred["conf"], "cls": cls}
                
 
                            -    def update_metrics(self, preds: List[torch.Tensor], batch: Dict[str, Any]) -> None:
                
 
                            +    def update_metrics(self, preds: List[Dict[str, torch.Tensor]], batch: Dict[str, Any]) -> None:
                
 
                                     """
                
 
                                     Update metrics with new predictions and ground truth.
                
 
                                     Args:
                
 
                            -            preds (List[torch.Tensor]): List of predictions from the model.
                
 
                            +            preds (List[Dict[str, torch.Tensor]]): List of predictions from the model.
                
 
                                         batch (Dict[str, Any]): Batch data containing ground truth.
                
 
                                     """
                
 
                                     for si, pred in enumerate(preds):
                
 
                                         self.seen += 1
                
 
                            -            npr = len(pred)
                
 
                            -            stat = dict(
                
 
                            -                conf=torch.zeros(0, device=self.device),
                
 
                            -                pred_cls=torch.zeros(0, device=self.device),
                
 
                            -                tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
                
 
                            -            )
                
 
                                         pbatch = self._prepare_batch(si, batch)
                
 
                            -            cls, bbox = pbatch.pop("cls"), pbatch.pop("bbox")
                
 
                            -            nl = len(cls)
                
 
                            -            stat["target_cls"] = cls
                
 
                            -            stat["target_img"] = cls.unique()
                
 
                            -            if npr == 0:
                
 
                            -                if nl:
                
 
                            -                    for k in self.stats.keys():
                
 
                            -                        self.stats[k].append(stat[k])
                
 
                            -                    if self.args.plots:
                
 
                            -                        self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
                
 
                            -                continue
                
 
                            -
                
 
                            -            # Predictions
                
 
                            -            if self.args.single_cls:
                
 
                            -                pred[:, 5] = 0
                
 
                                         predn = self._prepare_pred(pred, pbatch)
                
 
                            -            stat["conf"] = predn[:, 4]
                
 
                            -            stat["pred_cls"] = predn[:, 5]
                
 
                            +            cls = pbatch["cls"].cpu().numpy()
                
 
                            +            no_pred = len(predn["cls"]) == 0
                
 
                            +            if no_pred and len(cls) == 0:
                
 
                            +                continue
                
 
                            +            self.metrics.update_stats(
                
 
                            +                {
                
 
                            +                    **self._process_batch(predn, pbatch),
                
 
                            +                    "target_cls": cls,
                
 
                            +                    "target_img": np.unique(cls),
                
 
                            +                    "conf": np.zeros(0) if no_pred else predn["conf"].cpu().numpy(),
                
 
                            +                    "pred_cls": np.zeros(0) if no_pred else predn["cls"].cpu().numpy(),
                
 
                            +                }
                
 
                            +            )
                
 
                                         # Evaluate
                
 
                            -            if nl:
                
 
                            -                stat["tp"] = self._process_batch(predn, bbox, cls)
                
 
                                         if self.args.plots:
                
 
                            -                self.confusion_matrix.process_batch(predn, bbox, cls)
                
 
                            -            for k in self.stats.keys():
                
 
                            -                self.stats[k].append(stat[k])
                
 
                            +                self.confusion_matrix.process_batch(predn, pbatch, conf=self.args.conf)
                
 
                            +
                
 
                            +            if no_pred:
                
 
                            +                continue
                
 
                                         # Save
                
 
                                         if self.args.save_json:
                
@@ -241,44 +229,45 @@ class DetectionValidator(BaseValidator):
 
                                     Returns:
                
 
                                         (Dict[str, Any]): Dictionary containing metrics results.
                
 
                                     """
                
 
                            -        stats = {k: torch.cat(v, 0).cpu().numpy() for k, v in self.stats.items()}  # to numpy
                
 
                            -        self.nt_per_class = np.bincount(stats["target_cls"].astype(int), minlength=self.nc)
                
 
                            -        self.nt_per_image = np.bincount(stats["target_img"].astype(int), minlength=self.nc)
                
 
                            -        stats.pop("target_img", None)
                
 
                            -        if len(stats):
                
 
                            -            self.metrics.process(**stats, on_plot=self.on_plot)
                
 
                            +        self.metrics.process(save_dir=self.save_dir, plot=self.args.plots, on_plot=self.on_plot)
                
 
                            +        self.metrics.clear_stats()
                
 
                                     return self.metrics.results_dict
                
 
                                 def print_results(self) -> None:
                
 
                                     """Print training/validation set metrics per class."""
                
 
                                     pf = "%22s" + "%11i" * 2 + "%11.3g" * len(self.metrics.keys)  # print format
                
 
                            -        LOGGER.info(pf % ("all", self.seen, self.nt_per_class.sum(), *self.metrics.mean_results()))
                
 
                            -        if self.nt_per_class.sum() == 0:
                
 
                            +        LOGGER.info(pf % ("all", self.seen, self.metrics.nt_per_class.sum(), *self.metrics.mean_results()))
                
 
                            +        if self.metrics.nt_per_class.sum() == 0:
                
 
                                         LOGGER.warning(f"no labels found in {self.args.task} set, can not compute metrics without labels")
                
 
                                     # Print results per class
                
 
                            -        if self.args.verbose and not self.training and self.nc > 1 and len(self.stats):
                
 
                            +        if self.args.verbose and not self.training and self.nc > 1 and len(self.metrics.stats):
                
 
                                         for i, c in enumerate(self.metrics.ap_class_index):
                
 
                                             LOGGER.info(
                
 
                            -                    pf % (self.names[c], self.nt_per_image[c], self.nt_per_class[c], *self.metrics.class_result(i))
                
 
                            +                    pf
                
 
                            +                    % (
                
 
                            +                        self.names[c],
                
 
                            +                        self.metrics.nt_per_image[c],
                
 
                            +                        self.metrics.nt_per_class[c],
                
 
                            +                        *self.metrics.class_result(i),
                
 
                            +                    )
                
 
                                             )
                
 
                            -    def _process_batch(self, detections: torch.Tensor, gt_bboxes: torch.Tensor, gt_cls: torch.Tensor) -> torch.Tensor:
                
 
                            +    def _process_batch(self, preds: Dict[str, torch.Tensor], batch: Dict[str, Any]) -> Dict[str, np.ndarray]:
                
 
                                     """
                
 
                                     Return correct prediction matrix.
                
 
                                     Args:
                
 
                            -            detections (torch.Tensor): Tensor of shape (N, 6) representing detections where each detection is
                
 
                            -                (x1, y1, x2, y2, conf, class).
                
 
                            -            gt_bboxes (torch.Tensor): Tensor of shape (M, 4) representing ground-truth bounding box coordinates. Each
                
 
                            -                bounding box is of the format: (x1, y1, x2, y2).
                
 
                            -            gt_cls (torch.Tensor): Tensor of shape (M,) representing target class indices.
                
 
                            +            preds (Dict[str, torch.Tensor]): Dictionary containing prediction data with 'bboxes' and 'cls' keys.
                
 
                            +            batch (Dict[str, Any]): Batch dictionary containing ground truth data with 'bboxes' and 'cls' keys.
                
 
                                     Returns:
                
 
                            -            (torch.Tensor): Correct prediction matrix of shape (N, 10) for 10 IoU levels.
                
 
                            +            (Dict[str, np.ndarray]): Dictionary containing 'tp' key with correct prediction matrix of shape (N, 10) for 10 IoU levels.
                
 
                                     """
                
 
                            -        iou = box_iou(gt_bboxes, detections[:, :4])
                
 
                            -        return self.match_predictions(detections[:, 5], gt_cls, iou)
                
 
                            +        if len(batch["cls"]) == 0 or len(preds["cls"]) == 0:
                
 
                            +            return {"tp": np.zeros((len(preds["cls"]), self.niou), dtype=bool)}
                
 
                            +        iou = box_iou(batch["bboxes"], preds["bboxes"])
                
 
                            +        return {"tp": self.match_predictions(preds["cls"], batch["cls"], iou).cpu().numpy()}
                
 
                                 def build_dataset(self, img_path: str, mode: str = "val", batch: Optional[int] = None) -> torch.utils.data.Dataset:
                
 
                                     """
                
@@ -317,42 +306,50 @@ class DetectionValidator(BaseValidator):
 
                                         ni (int): Batch index.
                
 
                                     """
                
 
                                     plot_images(
                
 
                            -            batch["img"],
                
 
                            -            batch["batch_idx"],
                
 
                            -            batch["cls"].squeeze(-1),
                
 
                            -            batch["bboxes"],
                
 
                            +            labels=batch,
                
 
                                         paths=batch["im_file"],
                
 
                                         fname=self.save_dir / f"val_batch{ni}_labels.jpg",
                
 
                                         names=self.names,
                
 
                                         on_plot=self.on_plot,
                
 
                                     )
                
 
                            -    def plot_predictions(self, batch: Dict[str, Any], preds: List[torch.Tensor], ni: int) -> None:
                
 
                            +    def plot_predictions(
                
 
                            +        self, batch: Dict[str, Any], preds: List[Dict[str, torch.Tensor]], ni: int, max_det: Optional[int] = None
                
 
                            +    ) -> None:
                
 
                                     """
                
 
                                     Plot predicted bounding boxes on input images and save the result.
                
 
                                     Args:
                
 
                                         batch (Dict[str, Any]): Batch containing images and annotations.
                
 
                            -            preds (List[torch.Tensor]): List of predictions from the model.
                
 
                            +            preds (List[Dict[str, torch.Tensor]]): List of predictions from the model.
                
 
                                         ni (int): Batch index.
                
 
                            -        """
                
 
                            +            max_det (Optional[int]): Maximum number of detections to plot.
                
 
                            +        """
                
 
                            +        # TODO: optimize this
                
 
                            +        for i, pred in enumerate(preds):
                
 
                            +            pred["batch_idx"] = torch.ones_like(pred["conf"]) * i  # add batch index to predictions
                
 
                            +        keys = preds[0].keys()
                
 
                            +        max_det = max_det or self.args.max_det
                
 
                            +        batched_preds = {k: torch.cat([x[k][:max_det] for x in preds], dim=0) for k in keys}
                
 
                            +        # TODO: fix this
                
 
                            +        batched_preds["bboxes"][:, :4] = ops.xyxy2xywh(batched_preds["bboxes"][:, :4])  # convert to xywh format
                
 
                                     plot_images(
                
 
                            -            batch["img"],
                
 
                            -            *output_to_target(preds, max_det=self.args.max_det),
                
 
                            +            images=batch["img"],
                
 
                            +            labels=batched_preds,
                
 
                                         paths=batch["im_file"],
                
 
                                         fname=self.save_dir / f"val_batch{ni}_pred.jpg",
                
 
                                         names=self.names,
                
 
                                         on_plot=self.on_plot,
                
 
                                     )  # pred
                
 
                            -    def save_one_txt(self, predn: torch.Tensor, save_conf: bool, shape: Tuple[int, int], file: Path) -> None:
                
 
                            +    def save_one_txt(self, predn: Dict[str, torch.Tensor], save_conf: bool, shape: Tuple[int, int], file: Path) -> None:
                
 
                                     """
                
 
                                     Save YOLO detections to a txt file in normalized coordinates in a specific format.
                
 
                                     Args:
                
 
                            -            predn (torch.Tensor): Predictions in the format (x1, y1, x2, y2, conf, class).
                
 
                            +            predn (Dict[str, torch.Tensor]): Dictionary containing predictions with keys 'bboxes', 'conf', and 'cls'.
                
 
                                         save_conf (bool): Whether to save confidence scores.
                
 
                            -            shape (Tuple[int, int]): Shape of the original image.
                
 
                            +            shape (Tuple[int, int]): Shape of the original image (height, width).
                
 
                                         file (Path): File path to save the detections.
                
 
                                     """
                
 
                                     from ultralytics.engine.results import Results
                
@@ -361,28 +358,29 @@ class DetectionValidator(BaseValidator):
 
                                         np.zeros((shape[0], shape[1]), dtype=np.uint8),
                
 
                                         path=None,
                
 
                                         names=self.names,
                
 
                            -            boxes=predn[:, :6],
                
 
                            +            boxes=torch.cat([predn["bboxes"], predn["conf"].unsqueeze(-1), predn["cls"].unsqueeze(-1)], dim=1),
                
 
                                     ).save_txt(file, save_conf=save_conf)
                
 
                            -    def pred_to_json(self, predn: torch.Tensor, filename: str) -> None:
                
 
                            +    def pred_to_json(self, predn: Dict[str, torch.Tensor], filename: str) -> None:
                
 
                                     """
                
 
                                     Serialize YOLO predictions to COCO json format.
                
 
                                     Args:
                
 
                            -            predn (torch.Tensor): Predictions in the format (x1, y1, x2, y2, conf, class).
                
 
                            +            predn (Dict[str, torch.Tensor]): Predictions dictionary containing 'bboxes', 'conf', and 'cls' keys
                
 
                            +                with bounding box coordinates, confidence scores, and class predictions.
                
 
                                         filename (str): Image filename.
                
 
                                     """
                
 
                                     stem = Path(filename).stem
                
 
                                     image_id = int(stem) if stem.isnumeric() else stem
                
 
                            -        box = ops.xyxy2xywh(predn[:, :4])  # xywh
                
 
                            +        box = ops.xyxy2xywh(predn["bboxes"])  # xywh
                
 
                                     box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                
 
                            -        for p, b in zip(predn.tolist(), box.tolist()):
                
 
                            +        for b, s, c in zip(box.tolist(), predn["conf"].tolist(), predn["cls"].tolist()):
                
 
                                         self.jdict.append(
                
 
                                             {
                
 
                                                 "image_id": image_id,
                
 
                            -                    "category_id": self.class_map[int(p[5])],
                
 
                            +                    "category_id": self.class_map[int(c)],
                
 
                                                 "bbox": [round(x, 3) for x in b],
                
 
                            -                    "score": round(p[4], 5),
                
 
                            +                    "score": round(s, 5),
                
 
                                             }
                
 
                                         )
                
@@ -3,12 +3,12 @@
 
                             from pathlib import Path
                
 
                             from typing import Any, Dict, List, Tuple, Union
                
 
                            +import numpy as np
                
 
                             import torch
                
 
                             from ultralytics.models.yolo.detect import DetectionValidator
                
 
                             from ultralytics.utils import LOGGER, ops
                
 
                             from ultralytics.utils.metrics import OBBMetrics, batch_probiou
                
 
                            -from ultralytics.utils.plotting import output_to_rotated_target, plot_images
                
 
                             class OBBValidator(DetectionValidator):
                
@@ -55,7 +55,7 @@ class OBBValidator(DetectionValidator):
 
                                     """
                
 
                                     super().__init__(dataloader, save_dir, args, _callbacks)
                
 
                                     self.args.task = "obb"
                
 
                            -        self.metrics = OBBMetrics(save_dir=self.save_dir, plot=True)
                
 
                            +        self.metrics = OBBMetrics()
                
 
                                 def init_metrics(self, model: torch.nn.Module) -> None:
                
 
                                     """
                
@@ -68,20 +68,20 @@ class OBBValidator(DetectionValidator):
 
                                     val = self.data.get(self.args.split, "")  # validation path
                
 
                                     self.is_dota = isinstance(val, str) and "DOTA" in val  # check if dataset is DOTA format
                
 
                            -    def _process_batch(self, detections: torch.Tensor, gt_bboxes: torch.Tensor, gt_cls: torch.Tensor) -> torch.Tensor:
                
 
                            +    def _process_batch(self, preds: Dict[str, torch.Tensor], batch: Dict[str, torch.Tensor]) -> Dict[str, np.ndarray]:
                
 
                                     """
                
 
                                     Compute the correct prediction matrix for a batch of detections and ground truth bounding boxes.
                
 
                                     Args:
                
 
                            -            detections (torch.Tensor): Detected bounding boxes and associated data with shape (N, 7) where each
                
 
                            -                detection is represented as (x1, y1, x2, y2, conf, class, angle).
                
 
                            -            gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape (M, 5) where each box is represented
                
 
                            -                as (x1, y1, x2, y2, angle).
                
 
                            -            gt_cls (torch.Tensor): Class labels for the ground truth bounding boxes with shape (M,).
                
 
                            +            preds (Dict[str, torch.Tensor]): Prediction dictionary containing 'cls' and 'bboxes' keys with detected
                
 
                            +                class labels and bounding boxes.
                
 
                            +            batch (Dict[str, torch.Tensor]): Batch dictionary containing 'cls' and 'bboxes' keys with ground truth
                
 
                            +                class labels and bounding boxes.
                
 
                                     Returns:
                
 
                            -            (torch.Tensor): The correct prediction matrix with shape (N, 10), which includes 10 IoU levels for each
                
 
                            -                detection, indicating the accuracy of predictions compared to the ground truth.
                
 
                            +            (Dict[str, np.ndarray]): Dictionary containing 'tp' key with the correct prediction matrix as a numpy
                
 
                            +                array with shape (N, 10), which includes 10 IoU levels for each detection, indicating the accuracy
                
 
                            +                of predictions compared to the ground truth.
                
 
                                     Examples:
                
 
                                         >>> detections = torch.rand(100, 7)  # 100 sample detections
                
@@ -89,10 +89,25 @@ class OBBValidator(DetectionValidator):
 
                                         >>> gt_cls = torch.randint(0, 5, (50,))  # 50 ground truth class labels
                
 
                                         >>> correct_matrix = validator._process_batch(detections, gt_bboxes, gt_cls)
                
 
                                     """
                
 
                            -        iou = batch_probiou(gt_bboxes, torch.cat([detections[:, :4], detections[:, -1:]], dim=-1))
                
 
                            -        return self.match_predictions(detections[:, 5], gt_cls, iou)
                
 
                            +        if len(batch["cls"]) == 0 or len(preds["cls"]) == 0:
                
 
                            +            return {"tp": np.zeros((len(preds["cls"]), self.niou), dtype=bool)}
                
 
                            +        iou = batch_probiou(batch["bboxes"], preds["bboxes"])
                
 
                            +        return {"tp": self.match_predictions(preds["cls"], batch["cls"], iou).cpu().numpy()}
                
 
                            -    def _prepare_batch(self, si: int, batch: Dict) -> Dict:
                
 
                            +    def postprocess(self, preds: torch.Tensor) -> List[Dict[str, torch.Tensor]]:
                
 
                            +        """
                
 
                            +        Args:
                
 
                            +            preds (torch.Tensor): Raw predictions from the model.
                
 
                            +
                
 
                            +        Returns:
                
 
                            +            (List[Dict[str, torch.Tensor]]): Processed predictions with angle information concatenated to bboxes.
                
 
                            +        """
                
 
                            +        preds = super().postprocess(preds)
                
 
                            +        for pred in preds:
                
 
                            +            pred["bboxes"] = torch.cat([pred["bboxes"], pred.pop("extra")], dim=-1)  # concatenate angle
                
 
                            +        return preds
                
 
                            +
                
 
                            +    def _prepare_batch(self, si: int, batch: Dict[str, Any]) -> Dict[str, Any]:
                
 
                                     """
                
 
                                     Prepare batch data for OBB validation with proper scaling and formatting.
                
@@ -118,9 +133,9 @@ class OBBValidator(DetectionValidator):
 
                                     if len(cls):
                
 
                                         bbox[..., :4].mul_(torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]])  # target boxes
                
 
                                         ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad, xywh=True)  # native-space labels
                
 
                            -        return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
                
 
                            +        return {"cls": cls, "bboxes": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
                
 
                            -    def _prepare_pred(self, pred: torch.Tensor, pbatch: Dict[str, Any]) -> torch.Tensor:
                
 
                            +    def _prepare_pred(self, pred: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
                
 
                                     """
                
 
                                     Prepare predictions by scaling bounding boxes to original image dimensions.
                
@@ -128,20 +143,22 @@ class OBBValidator(DetectionValidator):
 
                                     input dimensions to the original image dimensions using the provided batch information.
                
 
                                     Args:
                
 
                            -            pred (torch.Tensor): Prediction tensor containing bounding box coordinates and other information.
                
 
                            +            pred (Dict[str, torch.Tensor]): Prediction dictionary containing bounding box coordinates and other information.
                
 
                                         pbatch (Dict[str, Any]): Dictionary containing batch information with keys:
                
 
                                             - imgsz (tuple): Model input image size.
                
 
                                             - ori_shape (tuple): Original image shape.
                
 
                                             - ratio_pad (tuple): Ratio and padding information for scaling.
                
 
                                     Returns:
                
 
                            -            (torch.Tensor): Scaled prediction tensor with bounding boxes in original image dimensions.
                
 
                            +            (Dict[str, torch.Tensor]): Scaled prediction dictionary with bounding boxes in original image dimensions.
                
 
                                     """
                
 
                            -        predn = pred.clone()
                
 
                            -        ops.scale_boxes(
                
 
                            -            pbatch["imgsz"], predn[:, :4], pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"], xywh=True
                
 
                            +        cls = pred["cls"]
                
 
                            +        if self.args.single_cls:
                
 
                            +            cls *= 0
                
 
                            +        bboxes = ops.scale_boxes(
                
 
                            +            pbatch["imgsz"], pred["bboxes"].clone(), pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"], xywh=True
                
 
                                     )  # native-space pred
                
 
                            -        return predn
                
 
                            +        return {"bboxes": bboxes, "conf": pred["conf"], "cls": cls}
                
 
                                 def plot_predictions(self, batch: Dict[str, Any], preds: List[torch.Tensor], ni: int) -> None:
                
 
                                     """
                
@@ -158,22 +175,18 @@ class OBBValidator(DetectionValidator):
 
                                         >>> preds = [torch.rand(10, 7)]  # Example predictions for one image
                
 
                                         >>> validator.plot_predictions(batch, preds, 0)
                
 
                                     """
                
 
                            -        plot_images(
                
 
                            -            batch["img"],
                
 
                            -            *output_to_rotated_target(preds, max_det=self.args.max_det),
                
 
                            -            paths=batch["im_file"],
                
 
                            -            fname=self.save_dir / f"val_batch{ni}_pred.jpg",
                
 
                            -            names=self.names,
                
 
                            -            on_plot=self.on_plot,
                
 
                            -        )  # pred
                
 
                            +        for p in preds:
                
 
                            +            # TODO: fix this duplicated `xywh2xyxy`
                
 
                            +            p["bboxes"][:, :4] = ops.xywh2xyxy(p["bboxes"][:, :4])  # convert to xyxy format for plotting
                
 
                            +        super().plot_predictions(batch, preds, ni)  # plot bboxes
                
 
                            -    def pred_to_json(self, predn: torch.Tensor, filename: Union[str, Path]) -> None:
                
 
                            +    def pred_to_json(self, predn: Dict[str, torch.Tensor], filename: Union[str, Path]) -> None:
                
 
                                     """
                
 
                                     Convert YOLO predictions to COCO JSON format with rotated bounding box information.
                
 
                                     Args:
                
 
                            -            predn (torch.Tensor): Prediction tensor containing bounding box coordinates, confidence scores,
                
 
                            -                class predictions, and rotation angles with shape (N, 6+) where the last column is the angle.
                
 
                            +            predn (Dict[str, torch.Tensor]): Prediction dictionary containing 'bboxes', 'conf', and 'cls' keys
                
 
                            +                with bounding box coordinates, confidence scores, and class predictions.
                
 
                                         filename (str | Path): Path to the image file for which predictions are being processed.
                
 
                                     Notes:
                
@@ -183,22 +196,20 @@ class OBBValidator(DetectionValidator):
 
                                     """
                
 
                                     stem = Path(filename).stem
                
 
                                     image_id = int(stem) if stem.isnumeric() else stem
                
 
                            -        rbox = torch.cat([predn[:, :4], predn[:, -1:]], dim=-1)
                
 
                            +        rbox = predn["bboxes"]
                
 
                                     poly = ops.xywhr2xyxyxyxy(rbox).view(-1, 8)
                
 
                            -        for i, (r, b) in enumerate(zip(rbox.tolist(), poly.tolist())):
                
 
                            +        for r, b, s, c in zip(rbox.tolist(), poly.tolist(), predn["conf"].tolist(), predn["cls"].tolist()):
                
 
                                         self.jdict.append(
                
 
                                             {
                
 
                                                 "image_id": image_id,
                
 
                            -                    "category_id": self.class_map[int(predn[i, 5].item())],
                
 
                            -                    "score": round(predn[i, 4].item(), 5),
                
 
                            +                    "category_id": self.class_map[int(c)],
                
 
                            +                    "score": round(s, 5),
                
 
                                                 "rbox": [round(x, 3) for x in r],
                
 
                                                 "poly": [round(x, 3) for x in b],
                
 
                                             }
                
 
                                         )
                
 
                            -    def save_one_txt(
                
 
                            -        self, predn: torch.Tensor, save_conf: bool, shape: Tuple[int, int], file: Union[Path, str]
                
 
                            -    ) -> None:
                
 
                            +    def save_one_txt(self, predn: Dict[str, torch.Tensor], save_conf: bool, shape: Tuple[int, int], file: Path) -> None:
                
 
                                     """
                
 
                                     Save YOLO OBB detections to a text file in normalized coordinates.
                
@@ -207,7 +218,7 @@ class OBBValidator(DetectionValidator):
 
                                             class predictions, and angles in format (x, y, w, h, conf, cls, angle).
                
 
                                         save_conf (bool): Whether to save confidence scores in the text file.
                
 
                                         shape (Tuple[int, int]): Original image shape in format (height, width).
                
 
                            -            file (Path | str): Output file path to save detections.
                
 
                            +            file (Path): Output file path to save detections.
                
 
                                     Examples:
                
 
                                         >>> validator = OBBValidator()
                
@@ -218,14 +229,11 @@ class OBBValidator(DetectionValidator):
 
                                     from ultralytics.engine.results import Results
                
 
                            -        rboxes = torch.cat([predn[:, :4], predn[:, -1:]], dim=-1)
                
 
                            -        # xywh, r, conf, cls
                
 
                            -        obb = torch.cat([rboxes, predn[:, 4:6]], dim=-1)
                
 
                                     Results(
                
 
                                         np.zeros((shape[0], shape[1]), dtype=np.uint8),
                
 
                                         path=None,
                
 
                                         names=self.names,
                
 
                            -            obb=obb,
                
 
                            +            obb=torch.cat([predn["bboxes"], predn["conf"].unsqueeze(-1), predn["cls"].unsqueeze(-1)], dim=1),
                
 
                                     ).save_txt(file, save_conf=save_conf)
                
 
                                 def eval_json(self, stats: Dict[str, Any]) -> Dict[str, Any]:
                
@@ -7,7 +7,7 @@ from typing import Any, Dict, Optional, Union
 
                             from ultralytics.models import yolo
                
 
                             from ultralytics.nn.tasks import PoseModel
                
 
                             from ultralytics.utils import DEFAULT_CFG, LOGGER
                
 
                            -from ultralytics.utils.plotting import plot_images, plot_results
                
 
                            +from ultralytics.utils.plotting import plot_results
                
 
                             class PoseTrainer(yolo.detect.DetectionTrainer):
                
@@ -108,40 +108,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
 
                                         self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
                
 
                                     )
                
 
                            -    def plot_training_samples(self, batch: Dict[str, Any], ni: int):
                
 
                            -        """
                
 
                            -        Plot a batch of training samples with annotated class labels, bounding boxes, and keypoints.
                
 
                            -
                
 
                            -        Args:
                
 
                            -            batch (dict): Dictionary containing batch data with the following keys:
                
 
                            -                - img (torch.Tensor): Batch of images
                
 
                            -                - keypoints (torch.Tensor): Keypoints coordinates for pose estimation
                
 
                            -                - cls (torch.Tensor): Class labels
                
 
                            -                - bboxes (torch.Tensor): Bounding box coordinates
                
 
                            -                - im_file (list): List of image file paths
                
 
                            -                - batch_idx (torch.Tensor): Batch indices for each instance
                
 
                            -            ni (int): Current training iteration number used for filename
                
 
                            -
                
 
                            -        The function saves the plotted batch as an image in the trainer's save directory with the filename
                
 
                            -        'train_batch{ni}.jpg', where ni is the iteration number.
                
 
                            -        """
                
 
                            -        images = batch["img"]
                
 
                            -        kpts = batch["keypoints"]
                
 
                            -        cls = batch["cls"].squeeze(-1)
                
 
                            -        bboxes = batch["bboxes"]
                
 
                            -        paths = batch["im_file"]
                
 
                            -        batch_idx = batch["batch_idx"]
                
 
                            -        plot_images(
                
 
                            -            images,
                
 
                            -            batch_idx,
                
 
                            -            cls,
                
 
                            -            bboxes,
                
 
                            -            kpts=kpts,
                
 
                            -            paths=paths,
                
 
                            -            fname=self.save_dir / f"train_batch{ni}.jpg",
                
 
                            -            on_plot=self.on_plot,
                
 
                            -        )
                
 
                            -
                
 
                                 def plot_metrics(self):
                
 
                                     """Plot training/validation metrics."""
                
 
                                     plot_results(file=self.csv, pose=True, on_plot=self.on_plot)  # save results.png
                
@@ -1,7 +1,7 @@
 
                             # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
                
 
                             from pathlib import Path
                
 
                            -from typing import Any, Dict, List, Optional, Tuple
                
 
                            +from typing import Any, Dict, Tuple
                
 
                             import numpy as np
                
 
                             import torch
                
@@ -9,8 +9,7 @@ import torch
 
                             from ultralytics.models.yolo.detect import DetectionValidator
                
 
                             from ultralytics.utils import LOGGER, ops
                
 
                             from ultralytics.utils.checks import check_requirements
                
 
                            -from ultralytics.utils.metrics import OKS_SIGMA, PoseMetrics, box_iou, kpt_iou
                
 
                            -from ultralytics.utils.plotting import output_to_target, plot_images
                
 
                            +from ultralytics.utils.metrics import OKS_SIGMA, PoseMetrics, kpt_iou
                
 
                             class PoseValidator(DetectionValidator):
                
@@ -33,7 +32,6 @@ class PoseValidator(DetectionValidator):
 
                                     _prepare_batch: Prepare a batch for processing by converting keypoints to float and scaling to original
                
 
                                         dimensions.
                
 
                                     _prepare_pred: Prepare and scale keypoints in predictions for pose processing.
                
 
                            -        update_metrics: Update metrics with new predictions and ground truth data.
                
 
                                     _process_batch: Return correct prediction matrix by computing Intersection over Union (IoU) between
                
 
                                         detections and ground truth.
                
 
                                     plot_val_samples: Plot and save validation set samples with ground truth bounding boxes and keypoints.
                
@@ -77,7 +75,7 @@ class PoseValidator(DetectionValidator):
 
                                     self.sigma = None
                
 
                                     self.kpt_shape = None
                
 
                                     self.args.task = "pose"
                
 
                            -        self.metrics = PoseMetrics(save_dir=self.save_dir)
                
 
                            +        self.metrics = PoseMetrics()
                
 
                                     if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
                
 
                                         LOGGER.warning(
                
 
                                             "Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. "
                
@@ -118,7 +116,36 @@ class PoseValidator(DetectionValidator):
 
                                     is_pose = self.kpt_shape == [17, 3]
                
 
                                     nkpt = self.kpt_shape[0]
                
 
                                     self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt
                
 
                            -        self.stats = dict(tp_p=[], tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
                
 
                            +
                
 
                            +    def postprocess(self, preds: torch.Tensor) -> Dict[str, torch.Tensor]:
                
 
                            +        """
                
 
                            +        Postprocess YOLO predictions to extract and reshape keypoints for pose estimation.
                
 
                            +
                
 
                            +        This method extends the parent class postprocessing by extracting keypoints from the 'extra'
                
 
                            +        field of predictions and reshaping them according to the keypoint shape configuration.
                
 
                            +        The keypoints are reshaped from a flattened format to the proper dimensional structure
                
 
                            +        (typically [N, 17, 3] for COCO pose format).
                
 
                            +
                
 
                            +        Args:
                
 
                            +            preds (torch.Tensor): Raw prediction tensor from the YOLO pose model containing
                
 
                            +                bounding boxes, confidence scores, class predictions, and keypoint data.
                
 
                            +
                
 
                            +        Returns:
                
 
                            +            (Dict[torch.Tensor]): Dict of processed prediction dictionaries, each containing:
                
 
                            +                - 'bboxes': Bounding box coordinates
                
 
                            +                - 'conf': Confidence scores
                
 
                            +                - 'cls': Class predictions
                
 
                            +                - 'keypoints': Reshaped keypoint coordinates with shape (-1, *self.kpt_shape)
                
 
                            +
                
 
                            +        Note:
                
 
                            +            If no keypoints are present in a prediction (empty keypoints), that prediction
                
 
                            +            is skipped and continues to the next one. The keypoints are extracted from the
                
 
                            +            'extra' field which contains additional task-specific data beyond basic detection.
                
 
                            +        """
                
 
                            +        preds = super().postprocess(preds)
                
 
                            +        for pred in preds:
                
 
                            +            pred["keypoints"] = pred.pop("extra").reshape(-1, *self.kpt_shape)  # remove extra if exists
                
 
                            +        return preds
                
 
                                 def _prepare_batch(self, si: int, batch: Dict[str, Any]) -> Dict[str, Any]:
                
 
                                     """
                
@@ -142,10 +169,10 @@ class PoseValidator(DetectionValidator):
 
                                     kpts[..., 0] *= w
                
 
                                     kpts[..., 1] *= h
                
 
                                     kpts = ops.scale_coords(pbatch["imgsz"], kpts, pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"])
                
 
                            -        pbatch["kpts"] = kpts
                
 
                            +        pbatch["keypoints"] = kpts
                
 
                                     return pbatch
                
 
                            -    def _prepare_pred(self, pred: torch.Tensor, pbatch: Dict[str, Any]) -> Tuple[torch.Tensor, torch.Tensor]:
                
 
                            +    def _prepare_pred(self, pred: Dict[str, Any], pbatch: Dict[str, Any]) -> Dict[str, Any]:
                
 
                                     """
                
 
                                     Prepare and scale keypoints in predictions for pose processing.
                
@@ -154,189 +181,59 @@ class PoseValidator(DetectionValidator):
 
                                     to match the original image dimensions.
                
 
                                     Args:
                
 
                            -            pred (torch.Tensor): Raw prediction tensor from the model.
                
 
                            +            pred (Dict[str, torch.Tensor]): Post-processed predictions from the model.
                
 
                                         pbatch (Dict[str, Any]): Processed batch dictionary containing image information including:
                
 
                                             - imgsz: Image size used for inference
                
 
                                             - ori_shape: Original image shape
                
 
                                             - ratio_pad: Ratio and padding information for coordinate scaling
                
 
                                     Returns:
                
 
                            -            predn (torch.Tensor): Processed prediction boxes scaled to original image dimensions.
                
 
                            -            pred_kpts (torch.Tensor): Predicted keypoints scaled to original image dimensions.
                
 
                            +            (Dict[str, Any]): Processed prediction dictionary with keypoints scaled to original image dimensions.
                
 
                                     """
                
 
                                     predn = super()._prepare_pred(pred, pbatch)
                
 
                            -        nk = pbatch["kpts"].shape[1]
                
 
                            -        pred_kpts = predn[:, 6:].view(len(predn), nk, -1)
                
 
                            -        ops.scale_coords(pbatch["imgsz"], pred_kpts, pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"])
                
 
                            -        return predn, pred_kpts
                
 
                            -
                
 
                            -    def update_metrics(self, preds: List[torch.Tensor], batch: Dict[str, Any]) -> None:
                
 
                            -        """
                
 
                            -        Update metrics with new predictions and ground truth data.
                
 
                            -
                
 
                            -        This method processes each prediction, compares it with ground truth, and updates various statistics
                
 
                            -        for performance evaluation.
                
 
                            +        predn["keypoints"] = ops.scale_coords(
                
 
                            +            pbatch["imgsz"], pred.get("keypoints").clone(), pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"]
                
 
                            +        )
                
 
                            +        return predn
                
 
                            -        Args:
                
 
                            -            preds (List[torch.Tensor]): List of prediction tensors from the model.
                
 
                            -            batch (Dict[str, Any]): Batch data containing images and ground truth annotations.
                
 
                            -        """
                
 
                            -        for si, pred in enumerate(preds):
                
 
                            -            self.seen += 1
                
 
                            -            npr = len(pred)
                
 
                            -            stat = dict(
                
 
                            -                conf=torch.zeros(0, device=self.device),
                
 
                            -                pred_cls=torch.zeros(0, device=self.device),
                
 
                            -                tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
                
 
                            -                tp_p=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
                
 
                            -            )
                
 
                            -            pbatch = self._prepare_batch(si, batch)
                
 
                            -            cls, bbox = pbatch.pop("cls"), pbatch.pop("bbox")
                
 
                            -            nl = len(cls)
                
 
                            -            stat["target_cls"] = cls
                
 
                            -            stat["target_img"] = cls.unique()
                
 
                            -            if npr == 0:
                
 
                            -                if nl:
                
 
                            -                    for k in self.stats.keys():
                
 
                            -                        self.stats[k].append(stat[k])
                
 
                            -                    if self.args.plots:
                
 
                            -                        self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
                
 
                            -                continue
                
 
                            -
                
 
                            -            # Predictions
                
 
                            -            if self.args.single_cls:
                
 
                            -                pred[:, 5] = 0
                
 
                            -            predn, pred_kpts = self._prepare_pred(pred, pbatch)
                
 
                            -            stat["conf"] = predn[:, 4]
                
 
                            -            stat["pred_cls"] = predn[:, 5]
                
 
                            -
                
 
                            -            # Evaluate
                
 
                            -            if nl:
                
 
                            -                stat["tp"] = self._process_batch(predn, bbox, cls)
                
 
                            -                stat["tp_p"] = self._process_batch(predn, bbox, cls, pred_kpts, pbatch["kpts"])
                
 
                            -            if self.args.plots:
                
 
                            -                self.confusion_matrix.process_batch(predn, bbox, cls)
                
 
                            -
                
 
                            -            for k in self.stats.keys():
                
 
                            -                self.stats[k].append(stat[k])
                
 
                            -
                
 
                            -            # Save
                
 
                            -            if self.args.save_json:
                
 
                            -                self.pred_to_json(predn, batch["im_file"][si])
                
 
                            -            if self.args.save_txt:
                
 
                            -                self.save_one_txt(
                
 
                            -                    predn,
                
 
                            -                    pred_kpts,
                
 
                            -                    self.args.save_conf,
                
 
                            -                    pbatch["ori_shape"],
                
 
                            -                    self.save_dir / "labels" / f"{Path(batch['im_file'][si]).stem}.txt",
                
 
                            -                )
                
 
                            -
                
 
                            -    def _process_batch(
                
 
                            -        self,
                
 
                            -        detections: torch.Tensor,
                
 
                            -        gt_bboxes: torch.Tensor,
                
 
                            -        gt_cls: torch.Tensor,
                
 
                            -        pred_kpts: Optional[torch.Tensor] = None,
                
 
                            -        gt_kpts: Optional[torch.Tensor] = None,
                
 
                            -    ) -> torch.Tensor:
                
 
                            +    def _process_batch(self, preds: Dict[str, torch.Tensor], batch: Dict[str, Any]) -> Dict[str, np.ndarray]:
                
 
                                     """
                
 
                                     Return correct prediction matrix by computing Intersection over Union (IoU) between detections and ground truth.
                
 
                                     Args:
                
 
                            -            detections (torch.Tensor): Tensor with shape (N, 6) representing detection boxes and scores, where each
                
 
                            -                detection is of the format (x1, y1, x2, y2, conf, class).
                
 
                            -            gt_bboxes (torch.Tensor): Tensor with shape (M, 4) representing ground truth bounding boxes, where each
                
 
                            -                box is of the format (x1, y1, x2, y2).
                
 
                            -            gt_cls (torch.Tensor): Tensor with shape (M,) representing ground truth class indices.
                
 
                            -            pred_kpts (torch.Tensor, optional): Tensor with shape (N, 51) representing predicted keypoints, where
                
 
                            -                51 corresponds to 17 keypoints each having 3 values.
                
 
                            -            gt_kpts (torch.Tensor, optional): Tensor with shape (N, 51) representing ground truth keypoints.
                
 
                            +            preds (Dict[str, torch.Tensor]): Dictionary containing prediction data with keys 'cls' for class predictions
                
 
                            +                and 'keypoints' for keypoint predictions.
                
 
                            +            batch (Dict[str, Any]): Dictionary containing ground truth data with keys 'cls' for class labels,
                
 
                            +                'bboxes' for bounding boxes, and 'keypoints' for keypoint annotations.
                
 
                                     Returns:
                
 
                            -            (torch.Tensor): A tensor with shape (N, 10) representing the correct prediction matrix for 10 IoU levels,
                
 
                            -                where N is the number of detections.
                
 
                            +            (Dict[str, np.ndarray]): Dictionary containing the correct prediction matrix including 'tp_p' for pose
                
 
                            +                true positives across 10 IoU levels.
                
 
                                     Notes:
                
 
                                         `0.53` scale factor used in area computation is referenced from
                
 
                                         https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384.
                
 
                                     """
                
 
                            -        if pred_kpts is not None and gt_kpts is not None:
                
 
                            +        tp = super()._process_batch(preds, batch)
                
 
                            +        gt_cls = batch["cls"]
                
 
                            +        if len(gt_cls) == 0 or len(preds["cls"]) == 0:
                
 
                            +            tp_p = np.zeros((len(preds["cls"]), self.niou), dtype=bool)
                
 
                            +        else:
                
 
                                         # `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384
                
 
                            -            area = ops.xyxy2xywh(gt_bboxes)[:, 2:].prod(1) * 0.53
                
 
                            -            iou = kpt_iou(gt_kpts, pred_kpts, sigma=self.sigma, area=area)
                
 
                            -        else:  # boxes
                
 
                            -            iou = box_iou(gt_bboxes, detections[:, :4])
                
 
                            +            area = ops.xyxy2xywh(batch["bboxes"])[:, 2:].prod(1) * 0.53
                
 
                            +            iou = kpt_iou(batch["keypoints"], preds["keypoints"], sigma=self.sigma, area=area)
                
 
                            +            tp_p = self.match_predictions(preds["cls"], gt_cls, iou).cpu().numpy()
                
 
                            +        tp.update({"tp_p": tp_p})  # update tp with kpts IoU
                
 
                            +        return tp
                
 
                            -        return self.match_predictions(detections[:, 5], gt_cls, iou)
                
 
                            -
                
 
                            -    def plot_val_samples(self, batch: Dict[str, Any], ni: int) -> None:
                
 
                            -        """
                
 
                            -        Plot and save validation set samples with ground truth bounding boxes and keypoints.
                
 
                            -
                
 
                            -        Args:
                
 
                            -            batch (Dict[str, Any]): Dictionary containing batch data with keys:
                
 
                            -                - img (torch.Tensor): Batch of images
                
 
                            -                - batch_idx (torch.Tensor): Batch indices for each image
                
 
                            -                - cls (torch.Tensor): Class labels
                
 
                            -                - bboxes (torch.Tensor): Bounding box coordinates
                
 
                            -                - keypoints (torch.Tensor): Keypoint coordinates
                
 
                            -                - im_file (list): List of image file paths
                
 
                            -            ni (int): Batch index used for naming the output file
                
 
                            -        """
                
 
                            -        plot_images(
                
 
                            -            batch["img"],
                
 
                            -            batch["batch_idx"],
                
 
                            -            batch["cls"].squeeze(-1),
                
 
                            -            batch["bboxes"],
                
 
                            -            kpts=batch["keypoints"],
                
 
                            -            paths=batch["im_file"],
                
 
                            -            fname=self.save_dir / f"val_batch{ni}_labels.jpg",
                
 
                            -            names=self.names,
                
 
                            -            on_plot=self.on_plot,
                
 
                            -        )
                
 
                            -
                
 
                            -    def plot_predictions(self, batch: Dict[str, Any], preds: List[torch.Tensor], ni: int) -> None:
                
 
                            -        """
                
 
                            -        Plot and save model predictions with bounding boxes and keypoints.
                
 
                            -
                
 
                            -        Args:
                
 
                            -            batch (Dict[str, Any]): Dictionary containing batch data including images, file paths, and other metadata.
                
 
                            -            preds (List[torch.Tensor]): List of prediction tensors from the model, each containing bounding boxes,
                
 
                            -                confidence scores, class predictions, and keypoints.
                
 
                            -            ni (int): Batch index used for naming the output file.
                
 
                            -
                
 
                            -        The function extracts keypoints from predictions, converts predictions to target format, and plots them
                
 
                            -        on the input images. The resulting visualization is saved to the specified save directory.
                
 
                            -        """
                
 
                            -        pred_kpts = torch.cat([p[:, 6:].view(-1, *self.kpt_shape) for p in preds], 0)
                
 
                            -        plot_images(
                
 
                            -            batch["img"],
                
 
                            -            *output_to_target(preds, max_det=self.args.max_det),
                
 
                            -            kpts=pred_kpts,
                
 
                            -            paths=batch["im_file"],
                
 
                            -            fname=self.save_dir / f"val_batch{ni}_pred.jpg",
                
 
                            -            names=self.names,
                
 
                            -            on_plot=self.on_plot,
                
 
                            -        )  # pred
                
 
                            -
                
 
                            -    def save_one_txt(
                
 
                            -        self,
                
 
                            -        predn: torch.Tensor,
                
 
                            -        pred_kpts: torch.Tensor,
                
 
                            -        save_conf: bool,
                
 
                            -        shape: Tuple[int, int],
                
 
                            -        file: Path,
                
 
                            -    ) -> None:
                
 
                            +    def save_one_txt(self, predn: Dict[str, torch.Tensor], save_conf: bool, shape: Tuple[int, int], file: Path) -> None:
                
 
                                     """
                
 
                                     Save YOLO pose detections to a text file in normalized coordinates.
                
 
                                     Args:
                
 
                            -            predn (torch.Tensor): Prediction boxes and scores with shape (N, 6) for (x1, y1, x2, y2, conf, cls).
                
 
                            -            pred_kpts (torch.Tensor): Predicted keypoints with shape (N, K, D) where K is the number of keypoints
                
 
                            -                and D is the dimension (typically 3 for x, y, visibility).
                
 
                            +            predn (Dict[str, torch.Tensor]): Dictionary containing predictions with keys 'bboxes', 'conf', 'cls' and 'keypoints.
                
 
                                         save_conf (bool): Whether to save confidence scores.
                
 
                            -            shape (tuple): Original image shape (height, width).
                
 
                            +            shape (Tuple[int, int]): Shape of the original image (height, width).
                
 
                                         file (Path): Output file path to save detections.
                
 
                                     Notes:
                
@@ -349,11 +246,11 @@ class PoseValidator(DetectionValidator):
 
                                         np.zeros((shape[0], shape[1]), dtype=np.uint8),
                
 
                                         path=None,
                
 
                                         names=self.names,
                
 
                            -            boxes=predn[:, :6],
                
 
                            -            keypoints=pred_kpts,
                
 
                            +            boxes=torch.cat([predn["bboxes"], predn["conf"].unsqueeze(-1), predn["cls"].unsqueeze(-1)], dim=1),
                
 
                            +            keypoints=predn["keypoints"],
                
 
                                     ).save_txt(file, save_conf=save_conf)
                
 
                            -    def pred_to_json(self, predn: torch.Tensor, filename: str) -> None:
                
 
                            +    def pred_to_json(self, predn: Dict[str, torch.Tensor], filename: str) -> None:
                
 
                                     """
                
 
                                     Convert YOLO predictions to COCO JSON format.
                
@@ -361,10 +258,9 @@ class PoseValidator(DetectionValidator):
 
                                     to COCO format, and appends the results to the internal JSON dictionary (self.jdict).
                
 
                                     Args:
                
 
                            -            predn (torch.Tensor): Prediction tensor containing bounding boxes, confidence scores, class IDs,
                
 
                            -                and keypoints, with shape (N, 6+K) where N is the number of predictions and K is the flattened
                
 
                            -                keypoints dimension.
                
 
                            -            filename (str | Path): Path to the image file for which predictions are being processed.
                
 
                            +            predn (Dict[str, torch.Tensor]): Prediction dictionary containing 'bboxes', 'conf', 'cls',
                
 
                            +                and 'keypoints' tensors.
                
 
                            +            filename (str): Path to the image file for which predictions are being processed.
                
 
                                     Notes:
                
 
                                         The method extracts the image ID from the filename stem (either as an integer if numeric, or as a string),
                
@@ -373,16 +269,21 @@ class PoseValidator(DetectionValidator):
 
                                     """
                
 
                                     stem = Path(filename).stem
                
 
                                     image_id = int(stem) if stem.isnumeric() else stem
                
 
                            -        box = ops.xyxy2xywh(predn[:, :4])  # xywh
                
 
                            +        box = ops.xyxy2xywh(predn["bboxes"])  # xywh
                
 
                                     box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                
 
                            -        for p, b in zip(predn.tolist(), box.tolist()):
                
 
                            +        for b, s, c, k in zip(
                
 
                            +            box.tolist(),
                
 
                            +            predn["conf"].tolist(),
                
 
                            +            predn["cls"].tolist(),
                
 
                            +            predn["keypoints"].flatten(1, 2).tolist(),
                
 
                            +        ):
                
 
                                         self.jdict.append(
                
 
                                             {
                
 
                                                 "image_id": image_id,
                
 
                            -                    "category_id": self.class_map[int(p[5])],
                
 
                            +                    "category_id": self.class_map[int(c)],
                
 
                                                 "bbox": [round(x, 3) for x in b],
                
 
                            -                    "keypoints": p[6:],
                
 
                            -                    "score": round(p[4], 5),
                
 
                            +                    "keypoints": k,
                
 
                            +                    "score": round(s, 5),
                
 
                                             }
                
 
                                         )
                
@@ -7,7 +7,7 @@ from typing import Dict, Optional, Union
 
                             from ultralytics.models import yolo
                
 
                             from ultralytics.nn.tasks import SegmentationModel
                
 
                             from ultralytics.utils import DEFAULT_CFG, RANK
                
 
                            -from ultralytics.utils.plotting import plot_images, plot_results
                
 
                            +from ultralytics.utils.plotting import plot_results
                
 
                             class SegmentationTrainer(yolo.detect.DetectionTrainer):
                
@@ -82,46 +82,6 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
 
                                         self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
                
 
                                     )
                
 
                            -    def plot_training_samples(self, batch: Dict, ni: int):
                
 
                            -        """
                
 
                            -        Plot training sample images with labels, bounding boxes, and masks.
                
 
                            -
                
 
                            -        This method creates a visualization of training batch images with their corresponding labels, bounding boxes,
                
 
                            -        and segmentation masks, saving the result to a file for inspection and debugging.
                
 
                            -
                
 
                            -        Args:
                
 
                            -            batch (dict): Dictionary containing batch data with the following keys:
                
 
                            -                'img': Images tensor
                
 
                            -                'batch_idx': Batch indices for each box
                
 
                            -                'cls': Class labels tensor (squeezed to remove last dimension)
                
 
                            -                'bboxes': Bounding box coordinates tensor
                
 
                            -                'masks': Segmentation masks tensor
                
 
                            -                'im_file': List of image file paths
                
 
                            -            ni (int): Current training iteration number, used for naming the output file.
                
 
                            -
                
 
                            -        Examples:
                
 
                            -            >>> trainer = SegmentationTrainer()
                
 
                            -            >>> batch = {
                
 
                            -            ...     "img": torch.rand(16, 3, 640, 640),
                
 
                            -            ...     "batch_idx": torch.zeros(16),
                
 
                            -            ...     "cls": torch.randint(0, 80, (16, 1)),
                
 
                            -            ...     "bboxes": torch.rand(16, 4),
                
 
                            -            ...     "masks": torch.rand(16, 640, 640),
                
 
                            -            ...     "im_file": ["image1.jpg", "image2.jpg"],
                
 
                            -            ... }
                
 
                            -            >>> trainer.plot_training_samples(batch, ni=5)
                
 
                            -        """
                
 
                            -        plot_images(
                
 
                            -            batch["img"],
                
 
                            -            batch["batch_idx"],
                
 
                            -            batch["cls"].squeeze(-1),
                
 
                            -            batch["bboxes"],
                
 
                            -            masks=batch["masks"],
                
 
                            -            paths=batch["im_file"],
                
 
                            -            fname=self.save_dir / f"train_batch{ni}.jpg",
                
 
                            -            on_plot=self.on_plot,
                
 
                            -        )
                
 
                            -
                
 
                                 def plot_metrics(self):
                
 
                                     """Plot training/validation metrics."""
                
 
                                     plot_results(file=self.csv, segment=True, on_plot=self.on_plot)  # save results.png
                
@@ -2,7 +2,7 @@
 
                             from multiprocessing.pool import ThreadPool
                
 
                             from pathlib import Path
                
 
                            -from typing import Any, Dict, List, Optional, Tuple
                
 
                            +from typing import Any, Dict, List, Tuple
                
 
                             import numpy as np
                
 
                             import torch
                
@@ -11,8 +11,7 @@ import torch.nn.functional as F
 
                             from ultralytics.models.yolo.detect import DetectionValidator
                
 
                             from ultralytics.utils import LOGGER, NUM_THREADS, ops
                
 
                             from ultralytics.utils.checks import check_requirements
                
 
                            -from ultralytics.utils.metrics import SegmentMetrics, box_iou, mask_iou
                
 
                            -from ultralytics.utils.plotting import output_to_target, plot_images
                
 
                            +from ultralytics.utils.metrics import SegmentMetrics, mask_iou
                
 
                             class SegmentationValidator(DetectionValidator):
                
@@ -47,10 +46,9 @@ class SegmentationValidator(DetectionValidator):
 
                                         _callbacks (list, optional): List of callback functions.
                
 
                                     """
                
 
                                     super().__init__(dataloader, save_dir, args, _callbacks)
                
 
                            -        self.plot_masks = None
                
 
                                     self.process = None
                
 
                                     self.args.task = "segment"
                
 
                            -        self.metrics = SegmentMetrics(save_dir=self.save_dir)
                
 
                            +        self.metrics = SegmentMetrics()
                
 
                                 def preprocess(self, batch: Dict[str, Any]) -> Dict[str, Any]:
                
 
                                     """
                
@@ -74,12 +72,10 @@ class SegmentationValidator(DetectionValidator):
 
                                         model (torch.nn.Module): Model to validate.
                
 
                                     """
                
 
                                     super().init_metrics(model)
                
 
                            -        self.plot_masks = []
                
 
                                     if self.args.save_json:
                
 
                                         check_requirements("pycocotools>=2.0.6")
                
 
                                     # More accurate vs faster
                
 
                                     self.process = ops.process_mask_native if self.args.save_json or self.args.save_txt else ops.process_mask
                
 
                            -        self.stats = dict(tp_m=[], tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
                
 
                                 def get_desc(self) -> str:
                
 
                                     """Return a formatted description of evaluation metrics."""
                
@@ -97,7 +93,7 @@ class SegmentationValidator(DetectionValidator):
 
                                         "mAP50-95)",
                
 
                                     )
                
 
                            -    def postprocess(self, preds: List[torch.Tensor]) -> Tuple[List[torch.Tensor], torch.Tensor]:
                
 
                            +    def postprocess(self, preds: List[torch.Tensor]) -> List[Dict[str, torch.Tensor]]:
                
 
                                     """
                
 
                                     Post-process YOLO predictions and return output detections with proto.
                
@@ -105,12 +101,19 @@ class SegmentationValidator(DetectionValidator):
 
                                         preds (List[torch.Tensor]): Raw predictions from the model.
                
 
                                     Returns:
                
 
                            -            p (List[torch.Tensor]): Processed detection predictions.
                
 
                            -            proto (torch.Tensor): Prototype masks for segmentation.
                
 
                            +            List[Dict[str, torch.Tensor]]: Processed detection predictions with masks.
                
 
                                     """
                
 
                            -        p = super().postprocess(preds[0])
                
 
                                     proto = preds[1][-1] if len(preds[1]) == 3 else preds[1]  # second output is len 3 if pt, but only 1 if exported
                
 
                            -        return p, proto
                
 
                            +        preds = super().postprocess(preds[0])
                
 
                            +        imgsz = [4 * x for x in proto.shape[2:]]  # get image size from proto
                
 
                            +        for i, pred in enumerate(preds):
                
 
                            +            coefficient = pred.pop("extra")
                
 
                            +            pred["masks"] = (
                
 
                            +                self.process(proto[i], coefficient, pred["bboxes"], shape=imgsz)
                
 
                            +                if len(coefficient)
                
 
                            +                else torch.zeros((0, imgsz[0], imgsz[1]), dtype=torch.uint8, device=pred["bboxes"].device)
                
 
                            +            )
                
 
                            +        return preds
                
 
                                 def _prepare_batch(self, si: int, batch: Dict[str, Any]) -> Dict[str, Any]:
                
 
                                     """
                
@@ -128,142 +131,56 @@ class SegmentationValidator(DetectionValidator):
 
                                     prepared_batch["masks"] = batch["masks"][midx]
                
 
                                     return prepared_batch
                
 
                            -    def _prepare_pred(
                
 
                            -        self, pred: torch.Tensor, pbatch: Dict[str, Any], proto: torch.Tensor
                
 
                            -    ) -> Tuple[torch.Tensor, torch.Tensor]:
                
 
                            +    def _prepare_pred(self, pred: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
                
 
                                     """
                
 
                                     Prepare predictions for evaluation by processing bounding boxes and masks.
                
 
                                     Args:
                
 
                            -            pred (torch.Tensor): Raw predictions from the model.
                
 
                            +            pred (Dict[str, torch.Tensor]): Post-processed predictions from the model.
                
 
                                         pbatch (Dict[str, Any]): Prepared batch information.
                
 
                            -            proto (torch.Tensor): Prototype masks for segmentation.
                
 
                                     Returns:
                
 
                            -            predn (torch.Tensor): Processed bounding box predictions.
                
 
                            -            pred_masks (torch.Tensor): Processed mask predictions.
                
 
                            +            Dict[str, torch.Tensor]: Processed bounding box predictions.
                
 
                                     """
                
 
                                     predn = super()._prepare_pred(pred, pbatch)
                
 
                            -        pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=pbatch["imgsz"])
                
 
                            -        return predn, pred_masks
                
 
                            -
                
 
                            -    def update_metrics(self, preds: Tuple[List[torch.Tensor], torch.Tensor], batch: Dict[str, Any]) -> None:
                
 
                            -        """
                
 
                            -        Update metrics with the current batch predictions and targets.
                
 
                            -
                
 
                            -        Args:
                
 
                            -            preds (Tuple[List[torch.Tensor], torch.Tensor]): List of predictions from the model.
                
 
                            -            batch (Dict[str, Any]): Batch data containing ground truth.
                
 
                            -        """
                
 
                            -        for si, (pred, proto) in enumerate(zip(preds[0], preds[1])):
                
 
                            -            self.seen += 1
                
 
                            -            npr = len(pred)
                
 
                            -            stat = dict(
                
 
                            -                conf=torch.zeros(0, device=self.device),
                
 
                            -                pred_cls=torch.zeros(0, device=self.device),
                
 
                            -                tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
                
 
                            -                tp_m=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
                
 
                            +        predn["masks"] = pred["masks"]
                
 
                            +        if self.args.save_json and len(predn["masks"]):
                
 
                            +            coco_masks = torch.as_tensor(pred["masks"], dtype=torch.uint8)
                
 
                            +            coco_masks = ops.scale_image(
                
 
                            +                coco_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
                
 
                            +                pbatch["ori_shape"],
                
 
                            +                ratio_pad=pbatch["ratio_pad"],
                
 
                                         )
                
 
                            -            pbatch = self._prepare_batch(si, batch)
                
 
                            -            cls, bbox = pbatch.pop("cls"), pbatch.pop("bbox")
                
 
                            -            nl = len(cls)
                
 
                            -            stat["target_cls"] = cls
                
 
                            -            stat["target_img"] = cls.unique()
                
 
                            -            if npr == 0:
                
 
                            -                if nl:
                
 
                            -                    for k in self.stats.keys():
                
 
                            -                        self.stats[k].append(stat[k])
                
 
                            -                    if self.args.plots:
                
 
                            -                        self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
                
 
                            -                continue
                
 
                            -
                
 
                            -            # Masks
                
 
                            -            gt_masks = pbatch.pop("masks")
                
 
                            -            # Predictions
                
 
                            -            if self.args.single_cls:
                
 
                            -                pred[:, 5] = 0
                
 
                            -            predn, pred_masks = self._prepare_pred(pred, pbatch, proto)
                
 
                            -            stat["conf"] = predn[:, 4]
                
 
                            -            stat["pred_cls"] = predn[:, 5]
                
 
                            -
                
 
                            -            # Evaluate
                
 
                            -            if nl:
                
 
                            -                stat["tp"] = self._process_batch(predn, bbox, cls)
                
 
                            -                stat["tp_m"] = self._process_batch(
                
 
                            -                    predn, bbox, cls, pred_masks, gt_masks, self.args.overlap_mask, masks=True
                
 
                            -                )
                
 
                            -            if self.args.plots:
                
 
                            -                self.confusion_matrix.process_batch(predn, bbox, cls)
                
 
                            -
                
 
                            -            for k in self.stats.keys():
                
 
                            -                self.stats[k].append(stat[k])
                
 
                            -
                
 
                            -            pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
                
 
                            -            if self.args.plots and self.batch_i < 3:
                
 
                            -                self.plot_masks.append(pred_masks[:50].cpu())  # Limit plotted items for speed
                
 
                            -                if pred_masks.shape[0] > 50:
                
 
                            -                    LOGGER.warning("Limiting validation plots to first 50 items per image for speed...")
                
 
                            -
                
 
                            -            # Save
                
 
                            -            if self.args.save_json:
                
 
                            -                self.pred_to_json(
                
 
                            -                    predn,
                
 
                            -                    batch["im_file"][si],
                
 
                            -                    ops.scale_image(
                
 
                            -                        pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
                
 
                            -                        pbatch["ori_shape"],
                
 
                            -                        ratio_pad=batch["ratio_pad"][si],
                
 
                            -                    ),
                
 
                            -                )
                
 
                            -            if self.args.save_txt:
                
 
                            -                self.save_one_txt(
                
 
                            -                    predn,
                
 
                            -                    pred_masks,
                
 
                            -                    self.args.save_conf,
                
 
                            -                    pbatch["ori_shape"],
                
 
                            -                    self.save_dir / "labels" / f"{Path(batch['im_file'][si]).stem}.txt",
                
 
                            -                )
                
 
                            -
                
 
                            -    def _process_batch(
                
 
                            -        self,
                
 
                            -        detections: torch.Tensor,
                
 
                            -        gt_bboxes: torch.Tensor,
                
 
                            -        gt_cls: torch.Tensor,
                
 
                            -        pred_masks: Optional[torch.Tensor] = None,
                
 
                            -        gt_masks: Optional[torch.Tensor] = None,
                
 
                            -        overlap: Optional[bool] = False,
                
 
                            -        masks: Optional[bool] = False,
                
 
                            -    ) -> torch.Tensor:
                
 
                            +            predn["coco_masks"] = coco_masks
                
 
                            +        return predn
                
 
                            +
                
 
                            +    def _process_batch(self, preds: Dict[str, torch.Tensor], batch: Dict[str, Any]) -> Dict[str, np.ndarray]:
                
 
                                     """
                
 
                                     Compute correct prediction matrix for a batch based on bounding boxes and optional masks.
                
 
                                     Args:
                
 
                            -            detections (torch.Tensor): Tensor of shape (N, 6) representing detected bounding boxes and
                
 
                            -                associated confidence scores and class indices. Each row is of the format [x1, y1, x2, y2, conf, class].
                
 
                            -            gt_bboxes (torch.Tensor): Tensor of shape (M, 4) representing ground truth bounding box coordinates.
                
 
                            -                Each row is of the format [x1, y1, x2, y2].
                
 
                            -            gt_cls (torch.Tensor): Tensor of shape (M,) representing ground truth class indices.
                
 
                            -            pred_masks (torch.Tensor, optional): Tensor representing predicted masks, if available. The shape should
                
 
                            -                match the ground truth masks.
                
 
                            -            gt_masks (torch.Tensor, optional): Tensor of shape (M, H, W) representing ground truth masks, if available.
                
 
                            -            overlap (bool, optional): Flag indicating if overlapping masks should be considered.
                
 
                            -            masks (bool, optional): Flag indicating if the batch contains mask data.
                
 
                            +            preds (Dict[str, torch.Tensor]): Dictionary containing predictions with keys like 'cls' and 'masks'.
                
 
                            +            batch (Dict[str, Any]): Dictionary containing batch data with keys like 'cls' and 'masks'.
                
 
                                     Returns:
                
 
                            -            (torch.Tensor): A correct prediction matrix of shape (N, 10), where 10 represents different IoU levels.
                
 
                            +            (Dict[str, np.ndarray]): A dictionary containing correct prediction matrices including 'tp_m' for mask IoU.
                
 
                                     Notes:
                
 
                                         - If `masks` is True, the function computes IoU between predicted and ground truth masks.
                
 
                                         - If `overlap` is True and `masks` is True, overlapping masks are taken into account when computing IoU.
                
 
                                     Examples:
                
 
                            -            >>> detections = torch.tensor([[25, 30, 200, 300, 0.8, 1], [50, 60, 180, 290, 0.75, 0]])
                
 
                            -            >>> gt_bboxes = torch.tensor([[24, 29, 199, 299], [55, 65, 185, 295]])
                
 
                            -            >>> gt_cls = torch.tensor([1, 0])
                
 
                            -            >>> correct_preds = validator._process_batch(detections, gt_bboxes, gt_cls)
                
 
                            +            >>> preds = {"cls": torch.tensor([1, 0]), "masks": torch.rand(2, 640, 640), "bboxes": torch.rand(2, 4)}
                
 
                            +            >>> batch = {"cls": torch.tensor([1, 0]), "masks": torch.rand(2, 640, 640), "bboxes": torch.rand(2, 4)}
                
 
                            +            >>> correct_preds = validator._process_batch(preds, batch)
                
 
                                     """
                
 
                            -        if masks:
                
 
                            -            if overlap:
                
 
                            +        tp = super()._process_batch(preds, batch)
                
 
                            +        gt_cls, gt_masks = batch["cls"], batch["masks"]
                
 
                            +        if len(gt_cls) == 0 or len(preds["cls"]) == 0:
                
 
                            +            tp_m = np.zeros((len(preds["cls"]), self.niou), dtype=bool)
                
 
                            +        else:
                
 
                            +            pred_masks = preds["masks"]
                
 
                            +            if self.args.overlap_mask:
                
 
                                             nl = len(gt_cls)
                
 
                                             index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
                
 
                                             gt_masks = gt_masks.repeat(nl, 1, 1)  # shape(1,640,640) -> (n,640,640)
                
@@ -272,60 +189,32 @@ class SegmentationValidator(DetectionValidator):
 
                                             gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0]
                
 
                                             gt_masks = gt_masks.gt_(0.5)
                
 
                                         iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
                
 
                            -        else:  # boxes
                
 
                            -            iou = box_iou(gt_bboxes, detections[:, :4])
                
 
                            -
                
 
                            -        return self.match_predictions(detections[:, 5], gt_cls, iou)
                
 
                            +            tp_m = self.match_predictions(preds["cls"], gt_cls, iou).cpu().numpy()
                
 
                            +        tp.update({"tp_m": tp_m})  # update tp with mask IoU
                
 
                            +        return tp
                
 
                            -    def plot_val_samples(self, batch: Dict[str, Any], ni: int) -> None:
                
 
                            -        """
                
 
                            -        Plot validation samples with bounding box labels and masks.
                
 
                            -
                
 
                            -        Args:
                
 
                            -            batch (Dict[str, Any]): Batch containing images and annotations.
                
 
                            -            ni (int): Batch index.
                
 
                            -        """
                
 
                            -        plot_images(
                
 
                            -            batch["img"],
                
 
                            -            batch["batch_idx"],
                
 
                            -            batch["cls"].squeeze(-1),
                
 
                            -            batch["bboxes"],
                
 
                            -            masks=batch["masks"],
                
 
                            -            paths=batch["im_file"],
                
 
                            -            fname=self.save_dir / f"val_batch{ni}_labels.jpg",
                
 
                            -            names=self.names,
                
 
                            -            on_plot=self.on_plot,
                
 
                            -        )
                
 
                            -
                
 
                            -    def plot_predictions(self, batch: Dict[str, Any], preds: List[torch.Tensor], ni: int) -> None:
                
 
                            +    def plot_predictions(self, batch: Dict[str, Any], preds: List[Dict[str, torch.Tensor]], ni: int) -> None:
                
 
                                     """
                
 
                                     Plot batch predictions with masks and bounding boxes.
                
 
                                     Args:
                
 
                                         batch (Dict[str, Any]): Batch containing images and annotations.
                
 
                            -            preds (List[torch.Tensor]): List of predictions from the model.
                
 
                            +            preds (List[Dict[str, torch.Tensor]]): List of predictions from the model.
                
 
                                         ni (int): Batch index.
                
 
                                     """
                
 
                            -        plot_images(
                
 
                            -            batch["img"],
                
 
                            -            *output_to_target(preds[0], max_det=50),  # not set to self.args.max_det due to slow plotting speed
                
 
                            -            torch.cat(self.plot_masks, dim=0) if len(self.plot_masks) else self.plot_masks,
                
 
                            -            paths=batch["im_file"],
                
 
                            -            fname=self.save_dir / f"val_batch{ni}_pred.jpg",
                
 
                            -            names=self.names,
                
 
                            -            on_plot=self.on_plot,
                
 
                            -        )  # pred
                
 
                            -        self.plot_masks.clear()
                
 
                            -
                
 
                            -    def save_one_txt(
                
 
                            -        self, predn: torch.Tensor, pred_masks: torch.Tensor, save_conf: bool, shape: Tuple[int, int], file: Path
                
 
                            -    ) -> None:
                
 
                            +        for p in preds:
                
 
                            +            masks = p["masks"]
                
 
                            +            if masks.shape[0] > 50:
                
 
                            +                LOGGER.warning("Limiting validation plots to first 50 items per image for speed...")
                
 
                            +            p["masks"] = torch.as_tensor(masks[:50], dtype=torch.uint8).cpu()
                
 
                            +        super().plot_predictions(batch, preds, ni, max_det=50)  # plot bboxes
                
 
                            +
                
 
                            +    def save_one_txt(self, predn: torch.Tensor, save_conf: bool, shape: Tuple[int, int], file: Path) -> None:
                
 
                                     """
                
 
                                     Save YOLO detections to a txt file in normalized coordinates in a specific format.
                
 
                                     Args:
                
 
                                         predn (torch.Tensor): Predictions in the format (x1, y1, x2, y2, conf, class).
                
 
                            -            pred_masks (torch.Tensor): Predicted masks.
                
 
                                         save_conf (bool): Whether to save confidence scores.
                
 
                                         shape (Tuple[int, int]): Shape of the original image.
                
 
                                         file (Path): File path to save the detections.
                
@@ -336,18 +225,17 @@ class SegmentationValidator(DetectionValidator):
 
                                         np.zeros((shape[0], shape[1]), dtype=np.uint8),
                
 
                                         path=None,
                
 
                                         names=self.names,
                
 
                            -            boxes=predn[:, :6],
                
 
                            -            masks=pred_masks,
                
 
                            +            boxes=torch.cat([predn["bboxes"], predn["conf"].unsqueeze(-1), predn["cls"].unsqueeze(-1)], dim=1),
                
 
                            +            masks=torch.as_tensor(predn["masks"], dtype=torch.uint8),
                
 
                                     ).save_txt(file, save_conf=save_conf)
                
 
                            -    def pred_to_json(self, predn: torch.Tensor, filename: str, pred_masks: torch.Tensor) -> None:
                
 
                            +    def pred_to_json(self, predn: torch.Tensor, filename: str) -> None:
                
 
                                     """
                
 
                                     Save one JSON result for COCO evaluation.
                
 
                                     Args:
                
 
                            -            predn (torch.Tensor): Predictions in the format [x1, y1, x2, y2, conf, cls].
                
 
                            +            predn (Dict[str, torch.Tensor]): Predictions containing bboxes, masks, confidence scores, and classes.
                
 
                                         filename (str): Image filename.
                
 
                            -            pred_masks (numpy.ndarray): Predicted masks.
                
 
                                     Examples:
                
 
                                          >>> result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
                
@@ -362,18 +250,18 @@ class SegmentationValidator(DetectionValidator):
 
                                     stem = Path(filename).stem
                
 
                                     image_id = int(stem) if stem.isnumeric() else stem
                
 
                            -        box = ops.xyxy2xywh(predn[:, :4])  # xywh
                
 
                            +        box = ops.xyxy2xywh(predn["bboxes"])  # xywh
                
 
                                     box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                
 
                            -        pred_masks = np.transpose(pred_masks, (2, 0, 1))
                
 
                            +        pred_masks = np.transpose(predn["coco_masks"], (2, 0, 1))
                
 
                                     with ThreadPool(NUM_THREADS) as pool:
                
 
                                         rles = pool.map(single_encode, pred_masks)
                
 
                            -        for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
                
 
                            +        for i, (b, s, c) in enumerate(zip(box.tolist(), predn["conf"].tolist(), predn["cls"].tolist())):
                
 
                                         self.jdict.append(
                
 
                                             {
                
 
                                                 "image_id": image_id,
                
 
                            -                    "category_id": self.class_map[int(p[5])],
                
 
                            +                    "category_id": self.class_map[int(c)],
                
 
                                                 "bbox": [round(x, 3) for x in b],
                
 
                            -                    "score": round(p[4], 5),
                
 
                            +                    "score": round(s, 5),
                
 
                                                 "segmentation": rles[i],
                
 
                                             }
                
 
                                         )
                
@@ -457,7 +457,7 @@ def _log_plots(experiment, trainer) -> None:
 
                                     >>> _log_plots(experiment, trainer)
                
 
                                 """
                
 
                                 plot_filenames = None
                
 
                            -    if isinstance(trainer.validator.metrics, SegmentMetrics) and trainer.validator.metrics.task == "segment":
                
 
                            +    if isinstance(trainer.validator.metrics, SegmentMetrics):
                
 
                                     plot_filenames = [
                
 
                                         trainer.save_dir / f"{prefix}{plots}.png"
                
 
                                         for plots in EVALUATION_PLOT_NAMES
                
@@ -4,7 +4,7 @@
 
                             import math
                
 
                             import warnings
                
 
                             from pathlib import Path
                
 
                            -from typing import Dict, List, Tuple, Union
                
 
                            +from typing import Any, Dict, List, Tuple, Union
                
 
                             import numpy as np
                
 
                             import torch
                
@@ -316,28 +316,22 @@ class ConfusionMatrix(DataExportMixin):
 
                                 Attributes:
                
 
                                     task (str): The type of task, either 'detect' or 'classify'.
                
 
                                     matrix (np.ndarray): The confusion matrix, with dimensions depending on the task.
                
 
                            -        nc (int): The number of classes.
                
 
                            -        conf (float): The confidence threshold for detections.
                
 
                            -        iou_thres (float): The Intersection over Union threshold.
                
 
                            +        nc (int): The number of category.
                
 
                            +        names (List[str]): The names of the classes, used as labels on the plot.
                
 
                                 """
                
 
                            -    def __init__(self, nc: int, conf: float = 0.25, iou_thres: float = 0.45, names: tuple = (), task: str = "detect"):
                
 
                            +    def __init__(self, names: List[str] = [], task: str = "detect"):
                
 
                                     """
                
 
                                     Initialize a ConfusionMatrix instance.
                
 
                                     Args:
                
 
                            -            nc (int): Number of classes.
                
 
                            -            conf (float, optional): Confidence threshold for detections.
                
 
                            -            iou_thres (float, optional): IoU threshold for matching detections to ground truth.
                
 
                            -            names (tuple, optional): Names of classes, used as labels on the plot.
                
 
                            +            names (List[str], optional): Names of classes, used as labels on the plot.
                
 
                                         task (str, optional): Type of task, either 'detect' or 'classify'.
                
 
                                     """
                
 
                                     self.task = task
                
 
                            -        self.matrix = np.zeros((nc + 1, nc + 1)) if self.task == "detect" else np.zeros((nc, nc))
                
 
                            -        self.nc = nc  # number of classes
                
 
                            -        self.names = list(names)  # name of classes
                
 
                            -        self.conf = 0.25 if conf in {None, 0.001} else conf  # apply 0.25 if default val conf is passed
                
 
                            -        self.iou_thres = iou_thres
                
 
                            +        self.nc = len(names)  # number of classes
                
 
                            +        self.matrix = np.zeros((self.nc + 1, self.nc + 1)) if self.task == "detect" else np.zeros((self.nc, self.nc))
                
 
                            +        self.names = names  # name of classes
                
 
                                 def process_cls_preds(self, preds, targets):
                
 
                                     """
                
@@ -351,41 +345,45 @@ class ConfusionMatrix(DataExportMixin):
 
                                     for p, t in zip(preds.cpu().numpy(), targets.cpu().numpy()):
                
 
                                         self.matrix[p][t] += 1
                
 
                            -    def process_batch(self, detections, gt_bboxes, gt_cls):
                
 
                            +    def process_batch(
                
 
                            +        self, detections: Dict[str, torch.Tensor], batch: Dict[str, Any], conf: float = 0.25, iou_thres: float = 0.45
                
 
                            +    ) -> None:
                
 
                                     """
                
 
                                     Update confusion matrix for object detection task.
                
 
                                     Args:
                
 
                            -            detections (Array[N, 6] | Array[N, 7]): Detected bounding boxes and their associated information.
                
 
                            -                                      Each row should contain (x1, y1, x2, y2, conf, class)
                
 
                            -                                      or with an additional element `angle` when it's obb.
                
 
                            -            gt_bboxes (Array[M, 4]| Array[N, 5]): Ground truth bounding boxes with xyxy/xyxyr format.
                
 
                            -            gt_cls (Array[M]): The class labels.
                
 
                            +            detections (Dict[str, torch.Tensor]): Dictionary containing detected bounding boxes and their associated information.
                
 
                            +                                       Should contain 'cls', 'conf', and 'bboxes' keys, where 'bboxes' can be
                
 
                            +                                       Array[N, 4] for regular boxes or Array[N, 5] for OBB with angle.
                
 
                            +            batch (Dict[str, Any]): Batch dictionary containing ground truth data with 'bboxes' (Array[M, 4]| Array[M, 5]) and
                
 
                            +                'cls' (Array[M]) keys, where M is the number of ground truth objects.
                
 
                            +            conf (float, optional): Confidence threshold for detections.
                
 
                            +            iou_thres (float, optional): IoU threshold for matching detections to ground truth.
                
 
                                     """
                
 
                            +        conf = 0.25 if conf in {None, 0.001} else conf  # apply 0.25 if default val conf is passed
                
 
                            +        gt_cls, gt_bboxes = batch["cls"], batch["bboxes"]
                
 
                            +        no_pred = len(detections["cls"]) == 0
                
 
                                     if gt_cls.shape[0] == 0:  # Check if labels is empty
                
 
                            -            if detections is not None:
                
 
                            -                detections = detections[detections[:, 4] > self.conf]
                
 
                            -                detection_classes = detections[:, 5].int().tolist()
                
 
                            +            if not no_pred:
                
 
                            +                detections = {k: detections[k][detections["conf"] > conf] for k in {"cls", "bboxes"}}
                
 
                            +                detection_classes = detections["cls"].int().tolist()
                
 
                                             for dc in detection_classes:
                
 
                                                 self.matrix[dc, self.nc] += 1  # false positives
                
 
                                         return
                
 
                            -        if detections is None:
                
 
                            +        if no_pred:
                
 
                                         gt_classes = gt_cls.int().tolist()
                
 
                                         for gc in gt_classes:
                
 
                                             self.matrix[self.nc, gc] += 1  # background FN
                
 
                                         return
                
 
                            -        detections = detections[detections[:, 4] > self.conf]
                
 
                            +        detections = {k: detections[k][detections["conf"] > conf] for k in {"cls", "bboxes"}}
                
 
                                     gt_classes = gt_cls.int().tolist()
                
 
                            -        detection_classes = detections[:, 5].int().tolist()
                
 
                            -        is_obb = detections.shape[1] == 7 and gt_bboxes.shape[1] == 5  # with additional `angle` dimension
                
 
                            -        iou = (
                
 
                            -            batch_probiou(gt_bboxes, torch.cat([detections[:, :4], detections[:, -1:]], dim=-1))
                
 
                            -            if is_obb
                
 
                            -            else box_iou(gt_bboxes, detections[:, :4])
                
 
                            -        )
                
 
                            -
                
 
                            -        x = torch.where(iou > self.iou_thres)
                
 
                            +        detection_classes = detections["cls"].int().tolist()
                
 
                            +        bboxes = detections["bboxes"]
                
 
                            +        is_obb = bboxes.shape[1] == 5  # check if detections contains angle for OBB
                
 
                            +        iou = batch_probiou(gt_bboxes, bboxes) if is_obb else box_iou(gt_bboxes, bboxes)
                
 
                            +
                
 
                            +        x = torch.where(iou > iou_thres)
                
 
                                     if x[0].shape[0]:
                
 
                                         matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
                
 
                                         if x[0].shape[0] > 1:
                
@@ -949,53 +947,76 @@ class DetMetrics(SimpleClass, DataExportMixin):
 
                                 Utility class for computing detection metrics such as precision, recall, and mean average precision (mAP).
                
 
                                 Attributes:
                
 
                            -        save_dir (Path): A path to the directory where the output plots will be saved.
                
 
                            -        plot (bool): A flag that indicates whether to plot precision-recall curves for each class.
                
 
                                     names (Dict[int, str]): A dictionary of class names.
                
 
                                     box (Metric): An instance of the Metric class for storing detection results.
                
 
                                     speed (Dict[str, float]): A dictionary for storing execution times of different parts of the detection process.
                
 
                                     task (str): The task type, set to 'detect'.
                
 
                            +        stats (Dict[str, List]): A dictionary containing lists for true positives, confidence scores, predicted classes, target classes, and target images.
                
 
                            +        nt_per_class: Number of targets per class.
                
 
                            +        nt_per_image: Number of targets per image.
                
 
                                 """
                
 
                            -    def __init__(self, save_dir: Path = Path("."), plot: bool = False, names: Dict[int, str] = {}) -> None:
                
 
                            +    def __init__(self, names: Dict[int, str] = {}) -> None:
                
 
                                     """
                
 
                                     Initialize a DetMetrics instance with a save directory, plot flag, and class names.
                
 
                                     Args:
                
 
                            -            save_dir (Path, optional): Directory to save plots.
                
 
                            -            plot (bool, optional): Whether to plot precision-recall curves.
                
 
                                         names (Dict[int, str], optional): Dictionary of class names.
                
 
                                     """
                
 
                            -        self.save_dir = save_dir
                
 
                            -        self.plot = plot
                
 
                                     self.names = names
                
 
                                     self.box = Metric()
                
 
                                     self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
                
 
                                     self.task = "detect"
                
 
                            +        self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
                
 
                            +        self.nt_per_class = None
                
 
                            +        self.nt_per_image = None
                
 
                            -    def process(self, tp: np.ndarray, conf: np.ndarray, pred_cls: np.ndarray, target_cls: np.ndarray, on_plot=None):
                
 
                            +    def update_stats(self, stat: Dict[str, Any]) -> None:
                
 
                            +        """
                
 
                            +        Update statistics by appending new values to existing stat collections.
                
 
                            +
                
 
                            +        Args:
                
 
                            +            stat (Dict[str, any]): Dictionary containing new statistical values to append.
                
 
                            +                         Keys should match existing keys in self.stats.
                
 
                            +        """
                
 
                            +        for k in self.stats.keys():
                
 
                            +            self.stats[k].append(stat[k])
                
 
                            +
                
 
                            +    def process(self, save_dir: Path = Path("."), plot: bool = False, on_plot=None) -> Dict[str, np.ndarray]:
                
 
                                     """
                
 
                                     Process predicted results for object detection and update metrics.
                
 
                                     Args:
                
 
                            -            tp (np.ndarray): True positive array.
                
 
                            -            conf (np.ndarray): Confidence array.
                
 
                            -            pred_cls (np.ndarray): Predicted class indices array.
                
 
                            -            target_cls (np.ndarray): Target class indices array.
                
 
                            -            on_plot (callable, optional): Function to call after plots are generated.
                
 
                            +            save_dir (Path): Directory to save plots. Defaults to Path(".").
                
 
                            +            plot (bool): Whether to plot precision-recall curves. Defaults to False.
                
 
                            +            on_plot (callable, optional): Function to call after plots are generated. Defaults to None.
                
 
                            +
                
 
                            +        Returns:
                
 
                            +            (Dict[str, np.ndarray]): Dictionary containing concatenated statistics arrays.
                
 
                                     """
                
 
                            +        stats = {k: np.concatenate(v, 0) for k, v in self.stats.items()}  # to numpy
                
 
                            +        if len(stats) == 0:
                
 
                            +            return stats
                
 
                                     results = ap_per_class(
                
 
                            -            tp,
                
 
                            -            conf,
                
 
                            -            pred_cls,
                
 
                            -            target_cls,
                
 
                            -            plot=self.plot,
                
 
                            -            save_dir=self.save_dir,
                
 
                            +            stats["tp"],
                
 
                            +            stats["conf"],
                
 
                            +            stats["pred_cls"],
                
 
                            +            stats["target_cls"],
                
 
                            +            plot=plot,
                
 
                            +            save_dir=save_dir,
                
 
                                         names=self.names,
                
 
                                         on_plot=on_plot,
                
 
                                     )[2:]
                
 
                                     self.box.nc = len(self.names)
                
 
                                     self.box.update(results)
                
 
                            +        self.nt_per_class = np.bincount(stats["target_cls"].astype(int), minlength=len(self.names))
                
 
                            +        self.nt_per_image = np.bincount(stats["target_img"].astype(int), minlength=len(self.names))
                
 
                            +        return stats
                
 
                            +
                
 
                            +    def clear_stats(self):
                
 
                            +        """Clear the stored statistics."""
                
 
                            +        for v in self.stats.values():
                
 
                            +            v.clear()
                
 
                                 @property
                
 
                                 def keys(self) -> List[str]:
                
@@ -1077,92 +1098,65 @@ class DetMetrics(SimpleClass, DataExportMixin):
 
                                     ]
                
 
                            -class SegmentMetrics(SimpleClass, DataExportMixin):
                
 
                            +class SegmentMetrics(DetMetrics):
                
 
                                 """
                
 
                                 Calculate and aggregate detection and segmentation metrics over a given set of classes.
                
 
                                 Attributes:
                
 
                            -        save_dir (Path): Path to the directory where the output plots should be saved.
                
 
                            -        plot (bool): Whether to save the detection and segmentation plots.
                
 
                                     names (Dict[int, str]): Dictionary of class names.
                
 
                                     box (Metric): An instance of the Metric class for storing detection results.
                
 
                                     seg (Metric): An instance of the Metric class to calculate mask segmentation metrics.
                
 
                                     speed (Dict[str, float]): A dictionary for storing execution times of different parts of the detection process.
                
 
                                     task (str): The task type, set to 'segment'.
                
 
                            +        stats (Dict[str, List]): A dictionary containing lists for true positives, confidence scores, predicted classes, target classes, and target images.
                
 
                            +        nt_per_class: Number of targets per class.
                
 
                            +        nt_per_image: Number of targets per image.
                
 
                                 """
                
 
                            -    def __init__(self, save_dir: Path = Path("."), plot: bool = False, names: Dict[int, str] = {}) -> None:
                
 
                            +    def __init__(self, names: Dict[int, str] = {}) -> None:
                
 
                                     """
                
 
                                     Initialize a SegmentMetrics instance with a save directory, plot flag, and class names.
                
 
                                     Args:
                
 
                            -            save_dir (Path, optional): Directory to save plots.
                
 
                            -            plot (bool, optional): Whether to plot precision-recall curves.
                
 
                                         names (Dict[int, str], optional): Dictionary of class names.
                
 
                                     """
                
 
                            -        self.save_dir = save_dir
                
 
                            -        self.plot = plot
                
 
                            -        self.names = names
                
 
                            -        self.box = Metric()
                
 
                            +        DetMetrics.__init__(self, names)
                
 
                                     self.seg = Metric()
                
 
                            -        self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
                
 
                                     self.task = "segment"
                
 
                            +        self.stats["tp_m"] = []  # add additional stats for masks
                
 
                            -    def process(
                
 
                            -        self,
                
 
                            -        tp: np.ndarray,
                
 
                            -        tp_m: np.ndarray,
                
 
                            -        conf: np.ndarray,
                
 
                            -        pred_cls: np.ndarray,
                
 
                            -        target_cls: np.ndarray,
                
 
                            -        on_plot=None,
                
 
                            -    ):
                
 
                            +    def process(self, save_dir: Path = Path("."), plot: bool = False, on_plot=None) -> Dict[str, np.ndarray]:
                
 
                                     """
                
 
                                     Process the detection and segmentation metrics over the given set of predictions.
                
 
                                     Args:
                
 
                            -            tp (np.ndarray): True positive array for boxes.
                
 
                            -            tp_m (np.ndarray): True positive array for masks.
                
 
                            -            conf (np.ndarray): Confidence array.
                
 
                            -            pred_cls (np.ndarray): Predicted class indices array.
                
 
                            -            target_cls (np.ndarray): Target class indices array.
                
 
                            -            on_plot (callable, optional): Function to call after plots are generated.
                
 
                            +            save_dir (Path): Directory to save plots. Defaults to Path(".").
                
 
                            +            plot (bool): Whether to plot precision-recall curves. Defaults to False.
                
 
                            +            on_plot (callable, optional): Function to call after plots are generated. Defaults to None.
                
 
                            +
                
 
                            +        Returns:
                
 
                            +            (Dict[str, np.ndarray]): Dictionary containing concatenated statistics arrays.
                
 
                                     """
                
 
                            +        stats = DetMetrics.process(self, on_plot=on_plot)  # process box stats
                
 
                                     results_mask = ap_per_class(
                
 
                            -            tp_m,
                
 
                            -            conf,
                
 
                            -            pred_cls,
                
 
                            -            target_cls,
                
 
                            -            plot=self.plot,
                
 
                            +            stats["tp_m"],
                
 
                            +            stats["conf"],
                
 
                            +            stats["pred_cls"],
                
 
                            +            stats["target_cls"],
                
 
                            +            plot=plot,
                
 
                                         on_plot=on_plot,
                
 
                            -            save_dir=self.save_dir,
                
 
                            +            save_dir=save_dir,
                
 
                                         names=self.names,
                
 
                                         prefix="Mask",
                
 
                                     )[2:]
                
 
                                     self.seg.nc = len(self.names)
                
 
                                     self.seg.update(results_mask)
                
 
                            -        results_box = ap_per_class(
                
 
                            -            tp,
                
 
                            -            conf,
                
 
                            -            pred_cls,
                
 
                            -            target_cls,
                
 
                            -            plot=self.plot,
                
 
                            -            on_plot=on_plot,
                
 
                            -            save_dir=self.save_dir,
                
 
                            -            names=self.names,
                
 
                            -            prefix="Box",
                
 
                            -        )[2:]
                
 
                            -        self.box.nc = len(self.names)
                
 
                            -        self.box.update(results_box)
                
 
                            +        return stats
                
 
                                 @property
                
 
                                 def keys(self) -> List[str]:
                
 
                                     """Return a list of keys for accessing metrics."""
                
 
                            -        return [
                
 
                            -            "metrics/precision(B)",
                
 
                            -            "metrics/recall(B)",
                
 
                            -            "metrics/mAP50(B)",
                
 
                            -            "metrics/mAP50-95(B)",
                
 
                            +        return DetMetrics.keys.fget(self) + [
                
 
                                         "metrics/precision(M)",
                
 
                                         "metrics/recall(M)",
                
 
                                         "metrics/mAP50(M)",
                
@@ -1171,40 +1165,26 @@ class SegmentMetrics(SimpleClass, DataExportMixin):
 
                                 def mean_results(self) -> List[float]:
                
 
                                     """Return the mean metrics for bounding box and segmentation results."""
                
 
                            -        return self.box.mean_results() + self.seg.mean_results()
                
 
                            +        return DetMetrics.mean_results(self) + self.seg.mean_results()
                
 
                                 def class_result(self, i: int) -> List[float]:
                
 
                                     """Return classification results for a specified class index."""
                
 
                            -        return self.box.class_result(i) + self.seg.class_result(i)
                
 
                            +        return DetMetrics.class_result(self, i) + self.seg.class_result(i)
                
 
                                 @property
                
 
                                 def maps(self) -> np.ndarray:
                
 
                                     """Return mAP scores for object detection and semantic segmentation models."""
                
 
                            -        return self.box.maps + self.seg.maps
                
 
                            +        return DetMetrics.maps.fget(self) + self.seg.maps
                
 
                                 @property
                
 
                                 def fitness(self) -> float:
                
 
                                     """Return the fitness score for both segmentation and bounding box models."""
                
 
                            -        return self.seg.fitness() + self.box.fitness()
                
 
                            -
                
 
                            -    @property
                
 
                            -    def ap_class_index(self) -> List:
                
 
                            -        """Return the class indices (boxes and masks have the same ap_class_index)."""
                
 
                            -        return self.box.ap_class_index
                
 
                            -
                
 
                            -    @property
                
 
                            -    def results_dict(self) -> Dict[str, float]:
                
 
                            -        """Return results of object detection model for evaluation."""
                
 
                            -        return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness]))
                
 
                            +        return self.seg.fitness() + DetMetrics.fitness.fget(self)
                
 
                                 @property
                
 
                                 def curves(self) -> List[str]:
                
 
                                     """Return a list of curves for accessing specific metrics curves."""
                
 
                            -        return [
                
 
                            -            "Precision-Recall(B)",
                
 
                            -            "F1-Confidence(B)",
                
 
                            -            "Precision-Confidence(B)",
                
 
                            -            "Recall-Confidence(B)",
                
 
                            +        return DetMetrics.curves.fget(self) + [
                
 
                                         "Precision-Recall(M)",
                
 
                                         "F1-Confidence(M)",
                
 
                                         "Precision-Confidence(M)",
                
@@ -1214,7 +1194,7 @@ class SegmentMetrics(SimpleClass, DataExportMixin):
 
                                 @property
                
 
                                 def curves_results(self) -> List[List]:
                
 
                                     """Return dictionary of computed performance metrics and statistics."""
                
 
                            -        return self.box.curves_results + self.seg.curves_results
                
 
                            +        return DetMetrics.curves_results.fget(self) + self.seg.curves_results
                
 
                                 def summary(self, normalize: bool = True, decimals: int = 5) -> List[Dict[str, Union[str, float]]]:
                
 
                                     """
                
@@ -1234,43 +1214,34 @@ class SegmentMetrics(SimpleClass, DataExportMixin):
 
                                         >>> print(seg_summary)
                
 
                                     """
                
 
                                     scalars = {
                
 
                            -            "box-map": round(self.box.map, decimals),
                
 
                            -            "box-map50": round(self.box.map50, decimals),
                
 
                            -            "box-map75": round(self.box.map75, decimals),
                
 
                                         "mask-map": round(self.seg.map, decimals),
                
 
                                         "mask-map50": round(self.seg.map50, decimals),
                
 
                                         "mask-map75": round(self.seg.map75, decimals),
                
 
                                     }
                
 
                                     per_class = {
                
 
                            -            "box-p": self.box.p,
                
 
                            -            "box-r": self.box.r,
                
 
                            -            "box-f1": self.box.f1,
                
 
                                         "mask-p": self.seg.p,
                
 
                                         "mask-r": self.seg.r,
                
 
                                         "mask-f1": self.seg.f1,
                
 
                                     }
                
 
                            -        return [
                
 
                            -            {
                
 
                            -                "class_name": self.names[self.ap_class_index[i]],
                
 
                            -                **{k: round(v[i], decimals) for k, v in per_class.items()},
                
 
                            -                **scalars,
                
 
                            -            }
                
 
                            -            for i in range(len(per_class["box-p"]))
                
 
                            -        ]
                
 
                            +        summary = DetMetrics.summary(self, normalize, decimals)  # get box summary
                
 
                            +        for i, s in enumerate(summary):
                
 
                            +            s.update({**{k: round(v[i], decimals) for k, v in per_class.items()}, **scalars})
                
 
                            +        return summary
                
 
                            -class PoseMetrics(SegmentMetrics):
                
 
                            +class PoseMetrics(DetMetrics):
                
 
                                 """
                
 
                                 Calculate and aggregate detection and pose metrics over a given set of classes.
                
 
                                 Attributes:
                
 
                            -        save_dir (Path): Path to the directory where the output plots should be saved.
                
 
                            -        plot (bool): Whether to save the detection and pose plots.
                
 
                                     names (Dict[int, str]): Dictionary of class names.
                
 
                                     pose (Metric): An instance of the Metric class to calculate pose metrics.
                
 
                                     box (Metric): An instance of the Metric class for storing detection results.
                
 
                                     speed (Dict[str, float]): A dictionary for storing execution times of different parts of the detection process.
                
 
                                     task (str): The task type, set to 'pose'.
                
 
                            +        stats (Dict[str, List]): A dictionary containing lists for true positives, confidence scores, predicted classes, target classes, and target images.
                
 
                            +        nt_per_class: Number of targets per class.
                
 
                            +        nt_per_image: Number of targets per image.
                
 
                                 Methods:
                
 
                                     process(tp_m, tp_b, conf, pred_cls, target_cls): Process metrics over the given set of predictions.
                
@@ -1282,79 +1253,50 @@ class PoseMetrics(SegmentMetrics):
 
                                     results_dict: Return the dictionary containing all the detection and segmentation metrics and fitness score.
                
 
                                 """
                
 
                            -    def __init__(self, save_dir: Path = Path("."), plot: bool = False, names: Dict[int, str] = {}) -> None:
                
 
                            +    def __init__(self, names: Dict[int, str] = {}) -> None:
                
 
                                     """
                
 
                                     Initialize the PoseMetrics class with directory path, class names, and plotting options.
                
 
                                     Args:
                
 
                            -            save_dir (Path, optional): Directory to save plots.
                
 
                            -            plot (bool, optional): Whether to plot precision-recall curves.
                
 
                                         names (Dict[int, str], optional): Dictionary of class names.
                
 
                                     """
                
 
                            -        super().__init__(save_dir, plot, names)
                
 
                            -        self.save_dir = save_dir
                
 
                            -        self.plot = plot
                
 
                            -        self.names = names
                
 
                            -        self.box = Metric()
                
 
                            +        super().__init__(names)
                
 
                                     self.pose = Metric()
                
 
                            -        self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
                
 
                                     self.task = "pose"
                
 
                            +        self.stats["tp_p"] = []  # add additional stats for pose
                
 
                            -    def process(
                
 
                            -        self,
                
 
                            -        tp: np.ndarray,
                
 
                            -        tp_p: np.ndarray,
                
 
                            -        conf: np.ndarray,
                
 
                            -        pred_cls: np.ndarray,
                
 
                            -        target_cls: np.ndarray,
                
 
                            -        on_plot=None,
                
 
                            -    ):
                
 
                            +    def process(self, save_dir: Path = Path("."), plot: bool = False, on_plot=None) -> Dict[str, np.ndarray]:
                
 
                                     """
                
 
                                     Process the detection and pose metrics over the given set of predictions.
                
 
                                     Args:
                
 
                            -            tp (np.ndarray): True positive array for boxes.
                
 
                            -            tp_p (np.ndarray): True positive array for keypoints.
                
 
                            -            conf (np.ndarray): Confidence array.
                
 
                            -            pred_cls (np.ndarray): Predicted class indices array.
                
 
                            -            target_cls (np.ndarray): Target class indices array.
                
 
                            +            save_dir (Path): Directory to save plots. Defaults to Path(".").
                
 
                            +            plot (bool): Whether to plot precision-recall curves. Defaults to False.
                
 
                                         on_plot (callable, optional): Function to call after plots are generated.
                
 
                            +
                
 
                            +        Returns:
                
 
                            +            (Dict[str, np.ndarray]): Dictionary containing concatenated statistics arrays.
                
 
                                     """
                
 
                            +        stats = DetMetrics.process(self, on_plot=on_plot)  # process box stats
                
 
                                     results_pose = ap_per_class(
                
 
                            -            tp_p,
                
 
                            -            conf,
                
 
                            -            pred_cls,
                
 
                            -            target_cls,
                
 
                            -            plot=self.plot,
                
 
                            +            stats["tp_p"],
                
 
                            +            stats["conf"],
                
 
                            +            stats["pred_cls"],
                
 
                            +            stats["target_cls"],
                
 
                            +            plot=plot,
                
 
                                         on_plot=on_plot,
                
 
                            -            save_dir=self.save_dir,
                
 
                            +            save_dir=save_dir,
                
 
                                         names=self.names,
                
 
                                         prefix="Pose",
                
 
                                     )[2:]
                
 
                                     self.pose.nc = len(self.names)
                
 
                                     self.pose.update(results_pose)
                
 
                            -        results_box = ap_per_class(
                
 
                            -            tp,
                
 
                            -            conf,
                
 
                            -            pred_cls,
                
 
                            -            target_cls,
                
 
                            -            plot=self.plot,
                
 
                            -            on_plot=on_plot,
                
 
                            -            save_dir=self.save_dir,
                
 
                            -            names=self.names,
                
 
                            -            prefix="Box",
                
 
                            -        )[2:]
                
 
                            -        self.box.nc = len(self.names)
                
 
                            -        self.box.update(results_box)
                
 
                            +        return stats
                
 
                                 @property
                
 
                                 def keys(self) -> List[str]:
                
 
                                     """Return list of evaluation metric keys."""
                
 
                            -        return [
                
 
                            -            "metrics/precision(B)",
                
 
                            -            "metrics/recall(B)",
                
 
                            -            "metrics/mAP50(B)",
                
 
                            -            "metrics/mAP50-95(B)",
                
 
                            +        return DetMetrics.keys.fget(self) + [
                
 
                                         "metrics/precision(P)",
                
 
                                         "metrics/recall(P)",
                
 
                                         "metrics/mAP50(P)",
                
@@ -1363,26 +1305,26 @@ class PoseMetrics(SegmentMetrics):
 
                                 def mean_results(self) -> List[float]:
                
 
                                     """Return the mean results of box and pose."""
                
 
                            -        return self.box.mean_results() + self.pose.mean_results()
                
 
                            +        return DetMetrics.mean_results(self) + self.pose.mean_results()
                
 
                                 def class_result(self, i: int) -> List[float]:
                
 
                                     """Return the class-wise detection results for a specific class i."""
                
 
                            -        return self.box.class_result(i) + self.pose.class_result(i)
                
 
                            +        return DetMetrics.class_result(self, i) + self.pose.class_result(i)
                
 
                                 @property
                
 
                                 def maps(self) -> np.ndarray:
                
 
                                     """Return the mean average precision (mAP) per class for both box and pose detections."""
                
 
                            -        return self.box.maps + self.pose.maps
                
 
                            +        return DetMetrics.maps.fget(self) + self.pose.maps
                
 
                                 @property
                
 
                                 def fitness(self) -> float:
                
 
                                     """Return combined fitness score for pose and box detection."""
                
 
                            -        return self.pose.fitness() + self.box.fitness()
                
 
                            +        return self.pose.fitness() + DetMetrics.fitness.fget(self)
                
 
                                 @property
                
 
                                 def curves(self) -> List[str]:
                
 
                                     """Return a list of curves for accessing specific metrics curves."""
                
 
                            -        return [
                
 
                            +        return DetMetrics.curves.fget(self) + [
                
 
                                         "Precision-Recall(B)",
                
 
                                         "F1-Confidence(B)",
                
 
                                         "Precision-Confidence(B)",
                
@@ -1396,7 +1338,7 @@ class PoseMetrics(SegmentMetrics):
 
                                 @property
                
 
                                 def curves_results(self) -> List[List]:
                
 
                                     """Return dictionary of computed performance metrics and statistics."""
                
 
                            -        return self.box.curves_results + self.pose.curves_results
                
 
                            +        return DetMetrics.curves_results.fget(self) + self.pose.curves_results
                
 
                                 def summary(self, normalize: bool = True, decimals: int = 5) -> List[Dict[str, Union[str, float]]]:
                
 
                                     """
                
@@ -1416,29 +1358,19 @@ class PoseMetrics(SegmentMetrics):
 
                                         >>> print(pose_summary)
                
 
                                     """
                
 
                                     scalars = {
                
 
                            -            "box-map": round(self.box.map, decimals),
                
 
                            -            "box-map50": round(self.box.map50, decimals),
                
 
                            -            "box-map75": round(self.box.map75, decimals),
                
 
                                         "pose-map": round(self.pose.map, decimals),
                
 
                                         "pose-map50": round(self.pose.map50, decimals),
                
 
                                         "pose-map75": round(self.pose.map75, decimals),
                
 
                                     }
                
 
                                     per_class = {
                
 
                            -            "box-p": self.box.p,
                
 
                            -            "box-r": self.box.r,
                
 
                            -            "box-f1": self.box.f1,
                
 
                                         "pose-p": self.pose.p,
                
 
                                         "pose-r": self.pose.r,
                
 
                                         "pose-f1": self.pose.f1,
                
 
                                     }
                
 
                            -        return [
                
 
                            -            {
                
 
                            -                "class_name": self.names[self.ap_class_index[i]],
                
 
                            -                **{k: round(v[i], decimals) for k, v in per_class.items()},
                
 
                            -                **scalars,
                
 
                            -            }
                
 
                            -            for i in range(len(per_class["box-p"]))
                
 
                            -        ]
                
 
                            +        summary = DetMetrics.summary(self, normalize, decimals)  # get box summary
                
 
                            +        for i, s in enumerate(summary):
                
 
                            +            s.update({**{k: round(v[i], decimals) for k, v in per_class.items()}, **scalars})
                
 
                            +        return summary
                
 
                             class ClassifyMetrics(SimpleClass, DataExportMixin):
                
@@ -1516,133 +1448,30 @@ class ClassifyMetrics(SimpleClass, DataExportMixin):
 
                                     return [{"classify-top1": round(self.top1, decimals), "classify-top5": round(self.top5, decimals)}]
                
 
                            -class OBBMetrics(SimpleClass, DataExportMixin):
                
 
                            +class OBBMetrics(DetMetrics):
                
 
                                 """
                
 
                                 Metrics for evaluating oriented bounding box (OBB) detection.
                
 
                                 Attributes:
                
 
                            -        save_dir (Path): Path to the directory where the output plots should be saved.
                
 
                            -        plot (bool): Whether to save the detection plots.
                
 
                                     names (Dict[int, str]): Dictionary of class names.
                
 
                                     box (Metric): An instance of the Metric class for storing detection results.
                
 
                                     speed (Dict[str, float]): A dictionary for storing execution times of different parts of the detection process.
                
 
                                     task (str): The task type, set to 'obb'.
                
 
                            +        stats (Dict[str, List]): A dictionary containing lists for true positives, confidence scores, predicted classes, target classes, and target images.
                
 
                            +        nt_per_class: Number of targets per class.
                
 
                            +        nt_per_image: Number of targets per image.
                
 
                                 References:
                
 
                                     https://arxiv.org/pdf/2106.06072.pdf
                
 
                                 """
                
 
                            -    def __init__(self, save_dir: Path = Path("."), plot: bool = False, names: Dict[int, str] = {}) -> None:
                
 
                            +    def __init__(self, names: Dict[int, str] = {}) -> None:
                
 
                                     """
                
 
                                     Initialize an OBBMetrics instance with directory, plotting, and class names.
                
 
                                     Args:
                
 
                            -            save_dir (Path, optional): Directory to save plots.
                
 
                            -            plot (bool, optional): Whether to plot precision-recall curves.
                
 
                                         names (Dict[int, str], optional): Dictionary of class names.
                
 
                                     """
                
 
                            -        self.save_dir = save_dir
                
 
                            -        self.plot = plot
                
 
                            -        self.names = names
                
 
                            -        self.box = Metric()
                
 
                            -        self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
                
 
                            +        DetMetrics.__init__(self, names)
                
 
                            +        # TODO: probably remove task as well
                
 
                                     self.task = "obb"
                
 
                            -
                
 
                            -    def process(self, tp: np.ndarray, conf: np.ndarray, pred_cls: np.ndarray, target_cls: np.ndarray, on_plot=None):
                
 
                            -        """
                
 
                            -        Process predicted results for object detection and update metrics.
                
 
                            -
                
 
                            -        Args:
                
 
                            -            tp (np.ndarray): True positive array.
                
 
                            -            conf (np.ndarray): Confidence array.
                
 
                            -            pred_cls (np.ndarray): Predicted class indices array.
                
 
                            -            target_cls (np.ndarray): Target class indices array.
                
 
                            -            on_plot (callable, optional): Function to call after plots are generated.
                
 
                            -        """
                
 
                            -        results = ap_per_class(
                
 
                            -            tp,
                
 
                            -            conf,
                
 
                            -            pred_cls,
                
 
                            -            target_cls,
                
 
                            -            plot=self.plot,
                
 
                            -            save_dir=self.save_dir,
                
 
                            -            names=self.names,
                
 
                            -            on_plot=on_plot,
                
 
                            -        )[2:]
                
 
                            -        self.box.nc = len(self.names)
                
 
                            -        self.box.update(results)
                
 
                            -
                
 
                            -    @property
                
 
                            -    def keys(self) -> List[str]:
                
 
                            -        """Return a list of keys for accessing specific metrics."""
                
 
                            -        return ["metrics/precision(B)", "metrics/recall(B)", "metrics/mAP50(B)", "metrics/mAP50-95(B)"]
                
 
                            -
                
 
                            -    def mean_results(self) -> List[float]:
                
 
                            -        """Calculate mean of detected objects & return precision, recall, mAP50, and mAP50-95."""
                
 
                            -        return self.box.mean_results()
                
 
                            -
                
 
                            -    def class_result(self, i: int) -> Tuple[float, float, float, float]:
                
 
                            -        """Return the result of evaluating the performance of an object detection model on a specific class."""
                
 
                            -        return self.box.class_result(i)
                
 
                            -
                
 
                            -    @property
                
 
                            -    def maps(self) -> np.ndarray:
                
 
                            -        """Return mean Average Precision (mAP) scores per class."""
                
 
                            -        return self.box.maps
                
 
                            -
                
 
                            -    @property
                
 
                            -    def fitness(self) -> float:
                
 
                            -        """Return the fitness of box object."""
                
 
                            -        return self.box.fitness()
                
 
                            -
                
 
                            -    @property
                
 
                            -    def ap_class_index(self) -> List:
                
 
                            -        """Return the average precision index per class."""
                
 
                            -        return self.box.ap_class_index
                
 
                            -
                
 
                            -    @property
                
 
                            -    def results_dict(self) -> Dict[str, float]:
                
 
                            -        """Return dictionary of computed performance metrics and statistics."""
                
 
                            -        return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness]))
                
 
                            -
                
 
                            -    @property
                
 
                            -    def curves(self) -> List:
                
 
                            -        """Return a list of curves for accessing specific metrics curves."""
                
 
                            -        return []
                
 
                            -
                
 
                            -    @property
                
 
                            -    def curves_results(self) -> List:
                
 
                            -        """Return a list of curves for accessing specific metrics curves."""
                
 
                            -        return []
                
 
                            -
                
 
                            -    def summary(self, normalize: bool = True, decimals: int = 5) -> List[Dict[str, Union[str, float]]]:
                
 
                            -        """
                
 
                            -        Generate a summarized representation of per-class detection metrics as a list of dictionaries. Includes shared
                
 
                            -        scalar metrics (mAP, mAP50, mAP75) along with precision, recall, and F1-score for each class.
                
 
                            -
                
 
                            -        Args:
                
 
                            -            normalize (bool): For OBB metrics, everything is normalized  by default [0-1].
                
 
                            -            decimals (int): Number of decimal places to round the metrics values to.
                
 
                            -
                
 
                            -        Returns:
                
 
                            -            (List[Dict[str, Union[str, float]]]): A list of dictionaries, each representing one class with detection metrics.
                
 
                            -
                
 
                            -        Examples:
                
 
                            -            >>> results = model.val(data="dota8.yaml")
                
 
                            -            >>> detection_summary = results.summary(decimals=4)
                
 
                            -            >>> print(detection_summary)
                
 
                            -        """
                
 
                            -        scalars = {
                
 
                            -            "box-map": round(self.box.map, decimals),
                
 
                            -            "box-map50": round(self.box.map50, decimals),
                
 
                            -            "box-map75": round(self.box.map75, decimals),
                
 
                            -        }
                
 
                            -        per_class = {"box-p": self.box.p, "box-r": self.box.r, "box-f1": self.box.f1}
                
 
                            -        return [
                
 
                            -            {
                
 
                            -                "class_name": self.names[self.ap_class_index[i]],
                
 
                            -                **{k: round(v[i], decimals) for k, v in per_class.items()},
                
 
                            -                **scalars,
                
 
                            -            }
                
 
                            -            for i in range(len(per_class["box-p"]))
                
 
                            -        ]
                
@@ -255,7 +255,7 @@ def non_max_suppression(
 
                                 bs = prediction.shape[0]  # batch size (BCN, i.e. 1,84,6300)
                
 
                                 nc = nc or (prediction.shape[1] - 4)  # number of classes
                
 
                            -    nm = prediction.shape[1] - nc - 4  # number of masks
                
 
                            +    extra = prediction.shape[1] - nc - 4  # number of extra info
                
 
                                 mi = 4 + nc  # mask start index
                
 
                                 xc = prediction[:, 4:mi].amax(1) > conf_thres  # candidates
                
 
                                 xinds = torch.stack([torch.arange(len(i), device=prediction.device) for i in xc])[..., None]  # to track idxs
                
@@ -273,7 +273,7 @@ def non_max_suppression(
 
                                         prediction = torch.cat((xywh2xyxy(prediction[..., :4]), prediction[..., 4:]), dim=-1)  # xywh to xyxy
                
 
                                 t = time.time()
                
 
                            -    output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
                
 
                            +    output = [torch.zeros((0, 6 + extra), device=prediction.device)] * bs
                
 
                                 keepi = [torch.zeros((0, 1), device=prediction.device)] * bs  # to store the kept idxs
                
 
                                 for xi, (x, xk) in enumerate(zip(prediction, xinds)):  # image index, (preds, preds indices)
                
 
                                     # Apply constraints
                
@@ -284,7 +284,7 @@ def non_max_suppression(
 
                                     # Cat apriori labels if autolabelling
                
 
                                     if labels and len(labels[xi]) and not rotated:
                
 
                                         lb = labels[xi]
                
 
                            -            v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
                
 
                            +            v = torch.zeros((len(lb), nc + extra + 4), device=x.device)
                
 
                                         v[:, :4] = xywh2xyxy(lb[:, 1:5])  # box
                
 
                                         v[range(len(lb)), lb[:, 0].long() + 4] = 1.0  # cls
                
 
                                         x = torch.cat((x, v), 0)
                
@@ -294,7 +294,7 @@ def non_max_suppression(
 
                                         continue
                
 
                                     # Detections matrix nx6 (xyxy, conf, cls)
                
 
                            -        box, cls, mask = x.split((4, nc, nm), 1)
                
 
                            +        box, cls, mask = x.split((4, nc, extra), 1)
                
 
                                     if multi_label:
                
 
                                         i, j = torch.where(cls > conf_thres)
                
@@ -3,7 +3,7 @@
 
                             import math
                
 
                             import warnings
                
 
                             from pathlib import Path
                
 
                            -from typing import Callable, Dict, List, Optional, Union
                
 
                            +from typing import Any, Callable, Dict, List, Optional, Union
                
 
                             import cv2
                
 
                             import numpy as np
                
@@ -678,13 +678,8 @@ def save_one_box(
 
                             @threaded
                
 
                             def plot_images(
                
 
                            -    images: Union[torch.Tensor, np.ndarray],
                
 
                            -    batch_idx: Union[torch.Tensor, np.ndarray],
                
 
                            -    cls: Union[torch.Tensor, np.ndarray],
                
 
                            -    bboxes: Union[torch.Tensor, np.ndarray] = np.zeros(0, dtype=np.float32),
                
 
                            -    confs: Optional[Union[torch.Tensor, np.ndarray]] = None,
                
 
                            -    masks: Union[torch.Tensor, np.ndarray] = np.zeros(0, dtype=np.uint8),
                
 
                            -    kpts: Union[torch.Tensor, np.ndarray] = np.zeros((0, 51), dtype=np.float32),
                
 
                            +    labels: Dict[str, Any],
                
 
                            +    images: Union[torch.Tensor, np.ndarray] = np.zeros((0, 3, 640, 640), dtype=np.float32),
                
 
                                 paths: Optional[List[str]] = None,
                
 
                                 fname: str = "images.jpg",
                
 
                                 names: Optional[Dict[int, str]] = None,
                
@@ -698,21 +693,16 @@ def plot_images(
 
                                 Plot image grid with labels, bounding boxes, masks, and keypoints.
                
 
                                 Args:
                
 
                            -        images: Batch of images to plot. Shape: (batch_size, channels, height, width).
                
 
                            -        batch_idx: Batch indices for each detection. Shape: (num_detections,).
                
 
                            -        cls: Class labels for each detection. Shape: (num_detections,).
                
 
                            -        bboxes: Bounding boxes for each detection. Shape: (num_detections, 4) or (num_detections, 5) for rotated boxes.
                
 
                            -        confs: Confidence scores for each detection. Shape: (num_detections,).
                
 
                            -        masks: Instance segmentation masks. Shape: (num_detections, height, width) or (1, height, width).
                
 
                            -        kpts: Keypoints for each detection. Shape: (num_detections, 51).
                
 
                            -        paths: List of file paths for each image in the batch.
                
 
                            -        fname: Output filename for the plotted image grid.
                
 
                            -        names: Dictionary mapping class indices to class names.
                
 
                            -        on_plot: Optional callback function to be called after saving the plot.
                
 
                            -        max_size: Maximum size of the output image grid.
                
 
                            -        max_subplots: Maximum number of subplots in the image grid.
                
 
                            -        save: Whether to save the plotted image grid to a file.
                
 
                            -        conf_thres: Confidence threshold for displaying detections.
                
 
                            +        labels (Dict[str, Any]): Dictionary containing detection data with keys like 'cls', 'bboxes', 'conf', 'masks', 'keypoints', 'batch_idx', 'img'.
                
 
                            +        images (Union[torch.Tensor, np.ndarray]): Batch of images to plot. Shape: (batch_size, channels, height, width).
                
 
                            +        paths (Optional[List[str]]): List of file paths for each image in the batch.
                
 
                            +        fname (str): Output filename for the plotted image grid.
                
 
                            +        names (Optional[Dict[int, str]]): Dictionary mapping class indices to class names.
                
 
                            +        on_plot (Optional[Callable]): Optional callback function to be called after saving the plot.
                
 
                            +        max_size (int): Maximum size of the output image grid.
                
 
                            +        max_subplots (int): Maximum number of subplots in the image grid.
                
 
                            +        save (bool): Whether to save the plotted image grid to a file.
                
 
                            +        conf_thres (float): Confidence threshold for displaying detections.
                
 
                                 Returns:
                
 
                                     (np.ndarray): Plotted image grid as a numpy array if save is False, None otherwise.
                
@@ -721,18 +711,24 @@ def plot_images(
 
                                     This function supports both tensor and numpy array inputs. It will automatically
                
 
                                     convert tensor inputs to numpy arrays for processing.
                
 
                                 """
                
 
                            -    if isinstance(images, torch.Tensor):
                
 
                            +    for k in {"cls", "bboxes", "conf", "masks", "keypoints", "batch_idx", "images"}:
                
 
                            +        if k not in labels:
                
 
                            +            continue
                
 
                            +        if k == "cls" and labels[k].ndim == 2:
                
 
                            +            labels[k] = labels[k].squeeze(1)  # squeeze if shape is (n, 1)
                
 
                            +        if isinstance(labels[k], torch.Tensor):
                
 
                            +            labels[k] = labels[k].cpu().numpy()
                
 
                            +
                
 
                            +    cls = labels.get("cls", np.zeros(0, dtype=np.int64))
                
 
                            +    batch_idx = labels.get("batch_idx", np.zeros(cls.shape, dtype=np.int64))
                
 
                            +    bboxes = labels.get("bboxes", np.zeros(0, dtype=np.float32))
                
 
                            +    confs = labels.get("conf", None)
                
 
                            +    masks = labels.get("masks", np.zeros(0, dtype=np.uint8))
                
 
                            +    kpts = labels.get("keypoints", np.zeros(0, dtype=np.float32))
                
 
                            +    images = labels.get("img", images)  # default to input images
                
 
                            +
                
 
                            +    if len(images) and isinstance(images, torch.Tensor):
                
 
                                     images = images.cpu().float().numpy()
                
 
                            -    if isinstance(cls, torch.Tensor):
                
 
                            -        cls = cls.cpu().numpy()
                
 
                            -    if isinstance(bboxes, torch.Tensor):
                
 
                            -        bboxes = bboxes.cpu().numpy()
                
 
                            -    if isinstance(masks, torch.Tensor):
                
 
                            -        masks = masks.cpu().numpy().astype(int)
                
 
                            -    if isinstance(kpts, torch.Tensor):
                
 
                            -        kpts = kpts.cpu().numpy()
                
 
                            -    if isinstance(batch_idx, torch.Tensor):
                
 
                            -        batch_idx = batch_idx.cpu().numpy()
                
 
                                 if images.shape[1] > 3:
                
 
                                     images = images[:, :3]  # crop multispectral images to first 3 channels
                
@@ -781,6 +777,7 @@ def plot_images(
 
                                             boxes[..., 0] += x
                
 
                                             boxes[..., 1] += y
                
 
                                             is_obb = boxes.shape[-1] == 5  # xywhr
                
 
                            +                # TODO: this transformation might be unnecessary
                
 
                                             boxes = ops.xywhr2xyxyxyxy(boxes) if is_obb else ops.xywh2xyxy(boxes)
                
 
                                             for j, box in enumerate(boxes.astype(np.int64).tolist()):
                
 
                                                 c = classes[j]
                
@@ -1004,28 +1001,6 @@ def plot_tune_results(csv_file: str = "tune_results.csv"):
 
                                 _save_one_file(csv_file.with_name("tune_fitness.png"))
                
 
                            -def output_to_target(output, max_det: int = 300):
                
 
                            -    """Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting."""
                
 
                            -    targets = []
                
 
                            -    for i, o in enumerate(output):
                
 
                            -        box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
                
 
                            -        j = torch.full((conf.shape[0], 1), i)
                
 
                            -        targets.append(torch.cat((j, cls, ops.xyxy2xywh(box), conf), 1))
                
 
                            -    targets = torch.cat(targets, 0).numpy()
                
 
                            -    return targets[:, 0], targets[:, 1], targets[:, 2:-1], targets[:, -1]
                
 
                            -
                
 
                            -
                
 
                            -def output_to_rotated_target(output, max_det: int = 300):
                
 
                            -    """Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting."""
                
 
                            -    targets = []
                
 
                            -    for i, o in enumerate(output):
                
 
                            -        box, conf, cls, angle = o[:max_det].cpu().split((4, 1, 1, 1), 1)
                
 
                            -        j = torch.full((conf.shape[0], 1), i)
                
 
                            -        targets.append(torch.cat((j, cls, box, angle, conf), 1))
                
 
                            -    targets = torch.cat(targets, 0).numpy()
                
 
                            -    return targets[:, 0], targets[:, 1], targets[:, 2:-1], targets[:, -1]
                
 
                            -
                
 
                            -
                
 
                             def feature_visualization(x, module_type: str, stage: int, n: int = 32, save_dir: Path = Path("runs/detect/exp")):
                
 
                                 """
                
 
                                 Visualize feature maps of a given model module during inference.