@@ -1,5 +1,5 @@
 
                             from super_gradients.common.factories.base_factory import BaseFactory
                
 
                            -from super_gradients.training.utils.bbox_formats import BBOX_FORMATS
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats import BBOX_FORMATS
                
 
                             class BBoxFormatFactory(BaseFactory):
                
 
            from super_gradients.common.factories.type_factory import TypeFactory
from super_gradients.training.datasets.data_formats.default_formats import DEFAULT_CONCATENATED_TENSOR_FORMATS


class ConcatenatedTensorFormatFactory(TypeFactory):
    def __init__(self):
        super().__init__(DEFAULT_CONCATENATED_TENSOR_FORMATS)

          
@@ -50,7 +50,6 @@ class Transforms:
 
                                 DetectionHSV = "DetectionHSV"
                
 
                                 DetectionHorizontalFlip = "DetectionHorizontalFlip"
                
 
                                 DetectionPaddedRescale = "DetectionPaddedRescale"
                
 
                            -    DetectionTargetsFormat = "DetectionTargetsFormat"
                
 
                                 DetectionTargetsFormatTransform = "DetectionTargetsFormatTransform"
                
 
                                 RandomResizedCropAndInterpolation = "RandomResizedCropAndInterpolation"
                
 
                                 RandAugmentTransform = "RandAugmentTransform"
                
@@ -253,3 +252,18 @@ class Models:
 
                                 PP_LITE_B_SEG75 = "pp_lite_b_seg75"
                
 
                                 UNET_CUSTOM = "unet_custom"
                
 
                                 UNET_CUSTOM_CLS = "unet_custom_cls"
                
 
                            +
                
 
                            +
                
 
                            +class ConcatenatedTensorFormats:
                
 
                            +    XYXY_LABEL = "XYXY_LABEL"
                
 
                            +    XYWH_LABEL = "XYWH_LABEL"
                
 
                            +    CXCYWH_LABEL = "CXCYWH_LABEL"
                
 
                            +    LABEL_XYXY = "LABEL_XYXY"
                
 
                            +    LABEL_XYWH = "LABEL_XYWH"
                
 
                            +    LABEL_CXCYWH = "LABEL_CXCYWH"
                
 
                            +    NORMALIZED_XYXY_LABEL = "NORMALIZED_XYXY_LABEL"
                
 
                            +    NORMALIZED_XYWH_LABEL = "NORMALIZED_XYWH_LABEL"
                
 
                            +    NORMALIZED_CXCYWH_LABEL = "NORMALIZED_CXCYWH_LABEL"
                
 
                            +    LABEL_NORMALIZED_XYXY = "LABEL_NORMALIZED_XYXY"
                
 
                            +    LABEL_NORMALIZED_XYWH = "LABEL_NORMALIZED_XYWH"
                
 
                            +    LABEL_NORMALIZED_CXCYWH = "LABEL_NORMALIZED_CXCYWH"
                
@@ -35,9 +35,8 @@ train_dataset_params:
 
                                     input_dim: ${dataset_params.train_dataset_params.input_dim}
                
 
                                     max_targets: 120
                
 
                                 - DetectionTargetsFormatTransform:
                
 
                            -        output_format:
                
 
                            -          _target_: super_gradients.training.utils.detection_utils.DetectionTargetsFormat # targets format
                
 
                            -          value: LABEL_CXCYWH
                
 
                            +        image_shape: ${dataset_params.train_dataset_params.input_dim}
                
 
                            +        output_format: LABEL_CXCYWH
                
 
                               tight_box_rotation: False
                
 
                               class_inclusion_list:
                
 
                               max_num_samples:
                
@@ -70,10 +69,9 @@ val_dataset_params:
 
                               - DetectionPaddedRescale:
                
 
                                   input_dim: ${dataset_params.val_dataset_params.input_dim}
                
 
                               - DetectionTargetsFormatTransform:
                
 
                            +      image_shape: ${dataset_params.val_dataset_params.input_dim}
                
 
                                   max_targets: 50
                
 
                            -      output_format:
                
 
                            -        _target_: super_gradients.training.utils.detection_utils.DetectionTargetsFormat # targets format
                
 
                            -        value: LABEL_CXCYWH
                
 
                            +      output_format: LABEL_CXCYWH
                
 
                               tight_box_rotation: False
                
 
                               class_inclusion_list:
                
 
                               max_num_samples:
                
@@ -30,9 +30,9 @@ train_dataset_params:
 
                                     input_dim: ${dataset_params.train_dataset_params.input_dim}
                
 
                                     max_targets: 120
                
 
                                 - DetectionTargetsFormatTransform:
                
 
                            -        output_format:
                
 
                            -          _target_: super_gradients.training.utils.detection_utils.DetectionTargetsFormat
                
 
                            -          value: LABEL_NORMALIZED_CXCYWH
                
 
                            +        image_shape: ${dataset_params.train_dataset_params.input_dim}
                
 
                            +        max_targets: 50
                
 
                            +        output_format: LABEL_NORMALIZED_CXCYWH
                
 
                               tight_box_rotation: False
                
 
                               class_inclusion_list:
                
@@ -65,10 +65,9 @@ val_dataset_params:
 
                                 - DetectionPaddedRescale:
                
 
                                     input_dim: ${dataset_params.val_dataset_params.input_dim}
                
 
                                 - DetectionTargetsFormatTransform:
                
 
                            +        image_shape: ${dataset_params.val_dataset_params.input_dim}
                
 
                                     max_targets: 50
                
 
                            -        output_format:
                
 
                            -          _target_: super_gradients.training.utils.detection_utils.DetectionTargetsFormat
                
 
                            -          value: LABEL_NORMALIZED_CXCYWH
                
 
                            +        output_format: LABEL_NORMALIZED_CXCYWH
                
 
                               tight_box_rotation: False
                
 
                               class_inclusion_list:
                
 
                               max_num_samples:
                
 
            from .format_converter import ConcatenatedTensorFormatConverter
from .output_adapters import DetectionOutputAdapter
from .formats import ConcatenatedTensorFormat, BoundingBoxesTensorSliceItem, TensorSliceItem

__all__ = ["ConcatenatedTensorFormatConverter", "DetectionOutputAdapter", "TensorSliceItem", "ConcatenatedTensorFormat", "BoundingBoxesTensorSliceItem"]

          
@@ -4,7 +4,7 @@ from typing import Tuple
 
                             import numpy as np
                
 
                             import torch
                
 
                            -from super_gradients.training.utils.bbox_formats.bbox_format import (
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.bbox_format import (
                
 
                                 BoundingBoxFormat,
                
 
                             )
                
@@ -1,10 +1,10 @@
 
                             from typing import Tuple
                
 
                            -from super_gradients.training.utils.bbox_formats.bbox_format import (
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.bbox_format import (
                
 
                                 BoundingBoxFormat,
                
 
                             )
                
 
                            -from super_gradients.training.utils.bbox_formats.cxcywh import cxcywh_to_xyxy, xyxy_to_cxcywh_inplace, cxcywh_to_xyxy_inplace
                
 
                            -from super_gradients.training.utils.bbox_formats.normalized_xyxy import (
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.cxcywh import cxcywh_to_xyxy, xyxy_to_cxcywh_inplace, cxcywh_to_xyxy_inplace
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.normalized_xyxy import (
                
 
                                 xyxy_to_normalized_xyxy_inplace,
                
 
                                 xyxy_to_normalized_xyxy,
                
 
                                 normalized_xyxy_to_xyxy_inplace,
                
@@ -1,14 +1,14 @@
 
                             from typing import Tuple
                
 
                            -from super_gradients.training.utils.bbox_formats.bbox_format import (
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.bbox_format import (
                
 
                                 BoundingBoxFormat,
                
 
                             )
                
 
                            -from super_gradients.training.utils.bbox_formats.normalized_xyxy import (
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.normalized_xyxy import (
                
 
                                 normalized_xyxy_to_xyxy_inplace,
                
 
                                 xyxy_to_normalized_xyxy_inplace,
                
 
                                 xyxy_to_normalized_xyxy,
                
 
                             )
                
 
                            -from super_gradients.training.utils.bbox_formats.xywh import xywh_to_xyxy_inplace, xywh_to_xyxy, xyxy_to_xywh_inplace
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.xywh import xywh_to_xyxy_inplace, xywh_to_xyxy, xyxy_to_xywh_inplace
                
 
                             __all__ = [
                
 
                                 "xyxy_to_normalized_xywh",
                
@@ -5,7 +5,7 @@ import numpy as np
 
                             import torch
                
 
                             from torch import Tensor
                
 
                            -from super_gradients.training.utils.bbox_formats.bbox_format import (
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.bbox_format import (
                
 
                                 BoundingBoxFormat,
                
 
                             )
                
@@ -3,7 +3,7 @@ from typing import Tuple
 
                             import numpy as np
                
 
                             import torch
                
 
                            -from super_gradients.training.utils.bbox_formats.bbox_format import (
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.bbox_format import (
                
 
                                 BoundingBoxFormat,
                
 
                             )
                
@@ -1,6 +1,6 @@
 
                             from typing import Tuple
                
 
                            -from super_gradients.training.utils.bbox_formats.bbox_format import (
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.bbox_format import (
                
 
                                 BoundingBoxFormat,
                
 
                             )
                
@@ -3,7 +3,7 @@ from typing import Tuple
 
                             import numpy as np
                
 
                             import torch
                
 
                            -from super_gradients.training.utils.bbox_formats.bbox_format import BoundingBoxFormat
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.bbox_format import BoundingBoxFormat
                
 
                             __all__ = ["YXYXCoordinateFormat", "xyxy_to_yxyx", "xyxy_to_yxyx_inplace"]
                
 
            from super_gradients.common.object_names import ConcatenatedTensorFormats
from super_gradients.training.datasets.data_formats.formats import ConcatenatedTensorFormat, BoundingBoxesTensorSliceItem, TensorSliceItem
from super_gradients.training.datasets.data_formats.bbox_formats import (
    XYXYCoordinateFormat,
    XYWHCoordinateFormat,
    CXCYWHCoordinateFormat,
    NormalizedXYXYCoordinateFormat,
    NormalizedXYWHCoordinateFormat,
    NormalizedCXCYWHCoordinateFormat,
)

XYXY_LABEL = ConcatenatedTensorFormat(
    layout=(
        BoundingBoxesTensorSliceItem(name="bboxes", format=XYXYCoordinateFormat()),
        TensorSliceItem(length=1, name="labels"),
    )
)
XYWH_LABEL = ConcatenatedTensorFormat(
    layout=(
        BoundingBoxesTensorSliceItem(name="bboxes", format=XYWHCoordinateFormat()),
        TensorSliceItem(length=1, name="labels"),
    )
)
CXCYWH_LABEL = ConcatenatedTensorFormat(
    layout=(
        BoundingBoxesTensorSliceItem(name="bboxes", format=CXCYWHCoordinateFormat()),
        TensorSliceItem(length=1, name="labels"),
    )
)
LABEL_XYXY = ConcatenatedTensorFormat(
    layout=(
        TensorSliceItem(length=1, name="labels"),
        BoundingBoxesTensorSliceItem(name="bboxes", format=XYXYCoordinateFormat()),
    )
)
LABEL_XYWH = ConcatenatedTensorFormat(
    layout=(
        TensorSliceItem(length=1, name="labels"),
        BoundingBoxesTensorSliceItem(name="bboxes", format=XYWHCoordinateFormat()),
    )
)
LABEL_CXCYWH = ConcatenatedTensorFormat(
    layout=(
        TensorSliceItem(length=1, name="labels"),
        BoundingBoxesTensorSliceItem(name="bboxes", format=CXCYWHCoordinateFormat()),
    )
)
NORMALIZED_XYXY_LABEL = ConcatenatedTensorFormat(
    layout=(
        BoundingBoxesTensorSliceItem(name="bboxes", format=NormalizedXYXYCoordinateFormat()),
        TensorSliceItem(length=1, name="labels"),
    )
)
NORMALIZED_XYWH_LABEL = ConcatenatedTensorFormat(
    layout=(
        BoundingBoxesTensorSliceItem(name="bboxes", format=NormalizedXYWHCoordinateFormat()),
        TensorSliceItem(length=1, name="labels"),
    )
)
NORMALIZED_CXCYWH_LABEL = ConcatenatedTensorFormat(
    layout=(
        BoundingBoxesTensorSliceItem(name="bboxes", format=NormalizedCXCYWHCoordinateFormat()),
        TensorSliceItem(length=1, name="labels"),
    )
)
LABEL_NORMALIZED_XYXY = ConcatenatedTensorFormat(
    layout=(
        TensorSliceItem(length=1, name="labels"),
        BoundingBoxesTensorSliceItem(name="bboxes", format=NormalizedXYXYCoordinateFormat()),
    )
)
LABEL_NORMALIZED_XYWH = ConcatenatedTensorFormat(
    layout=(
        TensorSliceItem(length=1, name="labels"),
        BoundingBoxesTensorSliceItem(name="bboxes", format=NormalizedXYWHCoordinateFormat()),
    )
)
LABEL_NORMALIZED_CXCYWH = ConcatenatedTensorFormat(
    layout=(
        TensorSliceItem(length=1, name="labels"),
        BoundingBoxesTensorSliceItem(name="bboxes", format=NormalizedCXCYWHCoordinateFormat()),
    )
)


def get_default_data_format(format_name: str) -> ConcatenatedTensorFormat:
    return DEFAULT_CONCATENATED_TENSOR_FORMATS[format_name]


DEFAULT_CONCATENATED_TENSOR_FORMATS = {
    ConcatenatedTensorFormats.XYXY_LABEL: XYXY_LABEL,
    ConcatenatedTensorFormats.XYWH_LABEL: XYWH_LABEL,
    ConcatenatedTensorFormats.CXCYWH_LABEL: CXCYWH_LABEL,
    ConcatenatedTensorFormats.LABEL_XYXY: LABEL_XYXY,
    ConcatenatedTensorFormats.LABEL_XYWH: LABEL_XYWH,
    ConcatenatedTensorFormats.LABEL_CXCYWH: LABEL_CXCYWH,
    ConcatenatedTensorFormats.NORMALIZED_XYXY_LABEL: NORMALIZED_XYXY_LABEL,
    ConcatenatedTensorFormats.NORMALIZED_XYWH_LABEL: NORMALIZED_XYWH_LABEL,
    ConcatenatedTensorFormats.NORMALIZED_CXCYWH_LABEL: NORMALIZED_CXCYWH_LABEL,
    ConcatenatedTensorFormats.LABEL_NORMALIZED_XYXY: LABEL_NORMALIZED_XYXY,
    ConcatenatedTensorFormats.LABEL_NORMALIZED_XYWH: LABEL_NORMALIZED_XYWH,
    ConcatenatedTensorFormats.LABEL_NORMALIZED_CXCYWH: LABEL_NORMALIZED_CXCYWH,
}

          
 
            from typing import Tuple, Union

import numpy as np
from torch import Tensor

from super_gradients.training.datasets.data_formats.bbox_formats import convert_bboxes
from super_gradients.training.datasets.data_formats.formats import ConcatenatedTensorFormat, apply_on_bboxes, get_permutation_indexes

__all__ = ["ConcatenatedTensorFormatConverter"]


class ConcatenatedTensorFormatConverter:
    def __init__(
        self,
        input_format: ConcatenatedTensorFormat,
        output_format: ConcatenatedTensorFormat,
        image_shape: Union[Tuple[int, int], None],
    ):
        """
        Converts concatenated tensors from input format to output format.

        Example:
            >>> from super_gradients.training.datasets.data_formats import ConcatenatedTensorFormatConverter
            >>> from super_gradients.training.datasets.data_formats.default_formats import LABEL_CXCYWH, LABEL_NORMALIZED_XYXY
            >>> h, w = 100, 200
            >>> input_target = np.array([[10, 20 / w, 30 / h, 40 / w, 50 / h]], dtype=np.float32)
            >>> expected_output_target = np.array([[10, 30, 40, 20, 20]], dtype=np.float32)
            >>>
            >>> transform = ConcatenatedTensorFormatConverter(input_format=LABEL_NORMALIZED_XYXY, output_format=LABEL_CXCYWH, image_shape=(h, w))
            >>>
            >>> # np.float32 approximation of multiplication/division can lead to uncertainty of up to 1e-7 in precision
            >>> assert np.allclose(transform(input_target), expected_output_target, atol=1e-6)

        :param input_format: Format definition of the inputs
        :param output_format: Format definition of the outputs
        :param image_shape: Shape of the input image (rows, cols), used for converting bbox coordinates from/to normalized format.
                            If you're not using normalized coordinates you can set this to None
        """
        self.permutation_indexes = get_permutation_indexes(input_format, output_format)

        self.input_format = input_format
        self.output_format = output_format
        self.image_shape = image_shape
        self.input_length = input_format.num_channels

    def __call__(self, tensor: Union[Tensor, np.ndarray]) -> Union[Tensor, np.ndarray]:
        if tensor.shape[-1] != self.input_length:
            raise RuntimeError(
                f"Number of channels in last dimension of input tensor ({tensor.shape[-1]}) must be "
                f"equal to {self.input_length} as defined by input format."
            )
        tensor = tensor[:, self.permutation_indexes]
        tensor = apply_on_bboxes(fn=self._convert_bbox, tensor=tensor, tensor_format=self.output_format)
        return tensor

    def _convert_bbox(self, bboxes: Union[Tensor, np.ndarray]) -> Union[Tensor, np.ndarray]:
        return convert_bboxes(
            bboxes=bboxes,
            source_format=self.input_format.bboxes_format.format,
            target_format=self.output_format.bboxes_format.format,
            inplace=False,
            image_shape=self.image_shape,
        )

          
 
            import collections
from typing import Tuple, Union, List, Mapping, Callable

import numpy as np
from torch import Tensor

from super_gradients.training.datasets.data_formats.bbox_formats import BoundingBoxFormat


class DetectionOutputFormat:
    pass


class TensorSliceItem:
    length: int
    name: str

    def __init__(self, name: str, length: int):
        self.name = name
        self.length = length

    def __repr__(self):
        return f"name={self.name} length={self.length}"


class BoundingBoxesTensorSliceItem(TensorSliceItem):
    format: BoundingBoxFormat

    def __init__(self, name: str, format: BoundingBoxFormat):
        super().__init__(name, length=format.get_num_parameters())
        self.format = format

    def __repr__(self):
        return f"name={self.name} length={self.length} format={self.format}"


class ConcatenatedTensorFormat(DetectionOutputFormat):
    """
    Define the output format that return a single tensor of shape [N,M] (N - number of detections,
    M - sum of bbox attributes) that is a concatenated from bbox coordinates and other fields.
    A layout defines the order of concatenated tensors. For instance:
    - layout: (bboxes, scores, labels) gives a Tensor that is product of torch.cat([bboxes, scores, labels], dim=1)
    - layout: (labels, bboxes) produce a Tensor from torch.cat([labels, bboxes], dim=1)
    """

    layout: Mapping[str, TensorSliceItem]
    locations: Mapping[str, Tuple[int, int]]
    indexes: Mapping[str, List[int]]
    num_channels: int

    @property
    def bboxes_format(self) -> BoundingBoxesTensorSliceItem:
        bbox_items = [x for x in self.layout.values() if isinstance(x, BoundingBoxesTensorSliceItem)]
        return bbox_items[0]

    def __init__(self, layout: Union[List[TensorSliceItem], Tuple[TensorSliceItem, ...]]):
        bbox_items = [x for x in layout if isinstance(x, BoundingBoxesTensorSliceItem)]
        if len(bbox_items) != 1:
            raise RuntimeError("Number of bounding box items must be strictly equal to 1")

        _layout = []
        _locations = []
        _indexes = []

        offset = 0
        for item in layout:
            location_indexes = list(range(offset, offset + item.length))
            location_slice = offset, offset + item.length

            _layout.append((item.name, item))
            _locations.append((item.name, location_slice))
            _indexes.append((item.name, location_indexes))
            offset += item.length

        self.layout = collections.OrderedDict(_layout)
        self.locations = collections.OrderedDict(_locations)
        self.indexes = collections.OrderedDict(_indexes)
        self.num_channels = offset

    def __repr__(self):
        return str(self.layout)


def apply_on_bboxes(
    fn: Callable[[Union[np.ndarray, Tensor]], Union[np.ndarray, Tensor]],
    tensor: Union[np.ndarray, Tensor],
    tensor_format: ConcatenatedTensorFormat,
) -> Union[np.ndarray, Tensor]:
    """Apply inplace a function only on the bboxes of a concatenated tensor.

    :param fn:              Function to apply on the bboxes.
    :param tensor:          Concatenated tensor that include - among other - the bboxes.
    :param tensor_format:   Format of the tensor, required to know the indexes of the bboxes.
    :return:                Tensor, after applying INPLACE the fn on the bboxes
    """
    return apply_on_layout(fn=fn, tensor=tensor, tensor_format=tensor_format, layout_name=tensor_format.bboxes_format.name)


def apply_on_layout(
    fn: Callable[[Union[np.ndarray, Tensor]], Union[np.ndarray, Tensor]],
    tensor: Union[np.ndarray, Tensor],
    tensor_format: ConcatenatedTensorFormat,
    layout_name: str,
) -> Union[np.ndarray, Tensor]:
    """Apply inplace a function only on a specific layout of a concatenated tensor.
    :param fn:              Function to apply on the bboxes.
    :param tensor:          Concatenated tensor that include - among other - the layout of interest.
    :param tensor_format:   Format of the tensor, required to know the indexes of the layout.
    :param layout_name:     Name of the layout of interest. It has to be defined in the tensor_format.
    :return:                Tensor, after applying INPLACE the fn on the layout
    """
    location = slice(*iter(tensor_format.locations[layout_name]))
    result = fn(tensor[..., location])
    tensor[..., location] = result
    return tensor


def filter_on_bboxes(
    fn: Callable[[Union[np.ndarray, Tensor]], Union[np.ndarray, Tensor]],
    tensor: Union[np.ndarray, Tensor],
    tensor_format: ConcatenatedTensorFormat,
) -> Union[np.ndarray, Tensor]:
    """Filter the tensor according to a condition on the bboxes.

    :param fn:              Function to filter the bboxes (keep only True elements).
    :param tensor:          Concatenated tensor that include - among other - the bboxes.
    :param tensor_format:   Format of the tensor, required to know the indexes of the bboxes.
    :return:                Tensor, after applying INPLACE the fn on the bboxes
    """
    return filter_on_layout(fn=fn, tensor=tensor, tensor_format=tensor_format, layout_name=tensor_format.bboxes_format.name)


def filter_on_layout(
    fn: Callable[[Union[np.ndarray, Tensor]], Union[np.ndarray, Tensor]],
    tensor: Union[np.ndarray, Tensor],
    tensor_format: ConcatenatedTensorFormat,
    layout_name: str,
) -> Union[np.ndarray, Tensor]:
    """Filter the tensor according to a condition on a specific layout.

    :param fn:              Function to filter the bboxes (keep only True elements).
    :param tensor:          Concatenated tensor that include - among other - the layout of interest.
    :param tensor_format:   Format of the tensor, required to know the indexes of the layout.
    :param layout_name:     Name of the layout of interest. It has to be defined in the tensor_format.
    :return:                Tensor, after filtering the bboxes according to fn.
    """
    location = slice(*tensor_format.locations[layout_name])
    mask = fn(tensor[..., location])
    tensor = tensor[mask]
    return tensor


def get_permutation_indexes(input_format: ConcatenatedTensorFormat, output_format: ConcatenatedTensorFormat) -> List[int]:
    """Compute the permutations required to change the format layout order.

    :param input_format:    Input format to transform from
    :param output_format:   Output format to transform to
    :return: Permutation indexes to go from input to output format.
    """
    output_indexes = []
    for output_name, output_spec in output_format.layout.items():
        if output_name not in input_format.layout:
            raise KeyError(f"Requested item '{output_name}' was not found among input format spec. Present items are: {tuple(input_format.layout.keys())}")

        input_spec = input_format.layout[output_name]
        if input_spec.length != output_spec.length:
            raise RuntimeError(
                f"Length of the output must match in input and output format. "
                f"Input spec size is {input_spec.length} for key '{output_name}' and output spec size is {output_spec.length}."
            )
        indexes = input_format.indexes[output_name]
        output_indexes.extend(indexes)
    return output_indexes

          
 
            from .detection_adapter import DetectionOutputAdapter

__all__ = ["DetectionOutputAdapter"]

          
@@ -4,8 +4,8 @@ from typing import Tuple, Union, Callable
 
                             import torch
                
 
                             from torch import nn, Tensor
                
 
                            -from super_gradients.training.utils.bbox_formats import BoundingBoxFormat
                
 
                            -from super_gradients.training.utils.output_adapters.formats import ConcatenatedTensorFormat
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats import BoundingBoxFormat
                
 
                            +from super_gradients.training.datasets.data_formats.formats import ConcatenatedTensorFormat
                
 
                             __all__ = ["DetectionOutputAdapter"]
                
@@ -72,8 +72,8 @@ class DetectionOutputAdapter(nn.Module):
 
                                 Adapter class for converting model's predictions for object detection to a desired format.
                
 
                                 This adapter supports torch.jit tracing & scripting & onnx conversion.
                
 
                            -    >>> from super_gradients.training.utils.output_adapters.formats import ConcatenatedTensorFormat, BoundingBoxesTensorSliceItem, TensorSliceItem
                
 
                            -    >>> from super_gradients.training.utils.bbox_formats import XYXYCoordinateFormat, NormalizedXYWHCoordinateFormat
                
 
                            +    >>> from super_gradients.training.datasets.data_formats.formats import ConcatenatedTensorFormat, BoundingBoxesTensorSliceItem, TensorSliceItem
                
 
                            +    >>> from super_gradients.training.datasets.data_formats.bbox_formats import XYXYCoordinateFormat, NormalizedXYWHCoordinateFormat
                
 
                                 >>>
                
 
                                 >>> class CustomDetectionHead(nn.Module):
                
 
                                 >>>    num_classes: int = 123
                
@@ -22,7 +22,6 @@ from super_gradients.training.transforms.transforms import (
 
                                 DetectionMixup,
                
 
                                 DetectionHSV,
                
 
                                 DetectionHorizontalFlip,
                
 
                            -    DetectionTargetsFormat,
                
 
                                 DetectionPaddedRescale,
                
 
                                 DetectionTargetsFormatTransform,
                
 
                                 Standardize,
                
@@ -82,7 +81,6 @@ TRANSFORMS = {
 
                                 Transforms.DetectionHSV: DetectionHSV,
                
 
                                 Transforms.DetectionHorizontalFlip: DetectionHorizontalFlip,
                
 
                                 Transforms.DetectionPaddedRescale: DetectionPaddedRescale,
                
 
                            -    Transforms.DetectionTargetsFormat: DetectionTargetsFormat,
                
 
                                 Transforms.DetectionTargetsFormatTransform: DetectionTargetsFormatTransform,
                
 
                                 Transforms.RandomResizedCropAndInterpolation: RandomResizedCropAndInterpolation,
                
 
                                 Transforms.RandAugmentTransform: rand_augment_transform,
                
@@ -9,7 +9,12 @@ from torchvision import transforms as transforms
 
                             import numpy as np
                
 
                             import cv2
                
 
                             from super_gradients.common.abstractions.abstract_logger import get_logger
                
 
                            -from super_gradients.training.utils.detection_utils import get_mosaic_coordinate, adjust_box_anns, xyxy2cxcywh, cxcywh2xyxy, DetectionTargetsFormat
                
 
                            +from super_gradients.common.decorators.factory_decorator import resolve_param
                
 
                            +from super_gradients.common.factories.data_formats_factory import ConcatenatedTensorFormatFactory
                
 
                            +from super_gradients.training.utils.detection_utils import get_mosaic_coordinate, adjust_box_anns, xyxy2cxcywh, cxcywh2xyxy
                
 
                            +from super_gradients.training.datasets.data_formats import ConcatenatedTensorFormatConverter
                
 
                            +from super_gradients.training.datasets.data_formats.formats import filter_on_bboxes, ConcatenatedTensorFormat
                
 
                            +from super_gradients.training.datasets.data_formats.default_formats import XYXY_LABEL, LABEL_CXCYWH
                
 
                             image_resample = Image.BILINEAR
                
 
                             mask_resample = Image.NEAREST
                
@@ -757,88 +762,60 @@ class DetectionTargetsFormatTransform(DetectionTransform):
 
                                 """
                
 
                                 Detection targets format transform
                
 
                            -    Converts targets in input_format to output_format.
                
 
                            +    Convert targets in input_format to output_format, filter small bboxes and pad targets.
                
 
                                 Attributes:
                
 
                            -        input_format: DetectionTargetsFormat: input target format
                
 
                            -        output_format: DetectionTargetsFormat: output target format
                
 
                            -        min_bbox_edge_size: int: bboxes with edge size lower then this values will be removed.
                
 
                            -        max_targets: int: max objects in single image, padding target to this size.
                
 
                            +        image_shape:        Shape of the images to transform.
                
 
                            +        input_format:       Format of the input targets. For instance [xmin, ymin, xmax, ymax, cls_id] refers to XYXY_LABEL
                
 
                            +        output_format:      Format of the output targets. For instance [xmin, ymin, xmax, ymax, cls_id] refers to XYXY_LABEL
                
 
                            +        min_bbox_edge_size: bboxes with edge size lower then this values will be removed.
                
 
                            +        max_targets:        Max objects in single image, padding target to this size.
                
 
                                 """
                
 
                            +    @resolve_param("input_format", ConcatenatedTensorFormatFactory())
                
 
                            +    @resolve_param("output_format", ConcatenatedTensorFormatFactory())
                
 
                                 def __init__(
                
 
                                     self,
                
 
                            -        input_format: DetectionTargetsFormat = DetectionTargetsFormat.XYXY_LABEL,
                
 
                            -        output_format: DetectionTargetsFormat = DetectionTargetsFormat.LABEL_CXCYWH,
                
 
                            +        image_shape: tuple,
                
 
                            +        input_format: ConcatenatedTensorFormat = XYXY_LABEL,
                
 
                            +        output_format: ConcatenatedTensorFormat = LABEL_CXCYWH,
                
 
                                     min_bbox_edge_size: float = 1,
                
 
                                     max_targets: int = 120,
                
 
                                 ):
                
 
                                     super(DetectionTargetsFormatTransform, self).__init__()
                
 
                                     self.input_format = input_format
                
 
                                     self.output_format = output_format
                
 
                            -        self.min_bbox_edge_size = min_bbox_edge_size
                
 
                                     self.max_targets = max_targets
                
 
                            +        self.min_bbox_edge_size = min_bbox_edge_size / max(image_shape) if output_format.bboxes_format.format.normalized else min_bbox_edge_size
                
 
                            +        self.targets_format_converter = ConcatenatedTensorFormatConverter(input_format=input_format, output_format=output_format, image_shape=image_shape)
                
 
                            -    def __call__(self, sample):
                
 
                            -        normalized_input = "NORMALIZED" in self.input_format.value
                
 
                            -        normalized_output = "NORMALIZED" in self.output_format.value
                
 
                            -        normalize = not normalized_input and normalized_output
                
 
                            -        denormalize = normalized_input and not normalized_output
                
 
                            -
                
 
                            -        label_first_in_input = self.input_format.value.split("_")[0] == "LABEL"
                
 
                            -        label_first_in_output = self.output_format.value.split("_")[0] == "LABEL"
                
 
                            -
                
 
                            -        input_xyxy_format = "XYXY" in self.input_format.value
                
 
                            -        output_xyxy_format = "XYXY" in self.output_format.value
                
 
                            -        convert2xyxy = not input_xyxy_format and output_xyxy_format
                
 
                            -        convert2cxcy = input_xyxy_format and not output_xyxy_format
                
 
                            -
                
 
                            -        image, targets, crowd_targets = sample["image"], sample["target"], sample.get("crowd_target")
                
 
                            -
                
 
                            -        _, h, w = image.shape
                
 
                            -
                
 
                            -        def _format_target(targets_in):
                
 
                            -            if label_first_in_input:
                
 
                            -                labels, boxes = targets_in[:, 0], targets_in[:, 1:]
                
 
                            -            else:
                
 
                            -                boxes, labels = targets_in[:, :4], targets_in[:, 4]
                
 
                            -
                
 
                            -            if convert2cxcy:
                
 
                            -                boxes = xyxy2cxcywh(boxes)
                
 
                            -            elif convert2xyxy:
                
 
                            -                boxes = cxcywh2xyxy(boxes)
                
 
                            -
                
 
                            -            if normalize:
                
 
                            -                boxes[:, 0] = boxes[:, 0] / w
                
 
                            -                boxes[:, 1] = boxes[:, 1] / h
                
 
                            -                boxes[:, 2] = boxes[:, 2] / w
                
 
                            -                boxes[:, 3] = boxes[:, 3] / h
                
 
                            -
                
 
                            -            elif denormalize:
                
 
                            -                boxes[:, 0] = boxes[:, 0] * w
                
 
                            -                boxes[:, 1] = boxes[:, 1] * h
                
 
                            -                boxes[:, 2] = boxes[:, 2] * w
                
 
                            -                boxes[:, 3] = boxes[:, 3] * h
                
 
                            -
                
 
                            -            min_bbox_edge_size = self.min_bbox_edge_size / max(w, h) if normalized_output else self.min_bbox_edge_size
                
 
                            +    def __call__(self, sample: dict) -> dict:
                
 
                            +        sample["target"] = self.apply_on_targets(sample["target"])
                
 
                            +        if "crowd_target" in sample.keys():
                
 
                            +            sample["crowd_target"] = self.apply_on_targets(sample["crowd_target"])
                
 
                            +        return sample
                
 
                            -            cxcywh_boxes = boxes if not output_xyxy_format else xyxy2cxcywh(boxes.copy())
                
 
                            +    def apply_on_targets(self, targets: np.ndarray) -> np.ndarray:
                
 
                            +        """Convert targets in input_format to output_format, filter small bboxes and pad targets"""
                
 
                            +        targets = self.targets_format_converter(targets)
                
 
                            +        targets = self.filter_small_bboxes(targets)
                
 
                            +        targets = self.pad_targets(targets)
                
 
                            +        return targets
                
 
                            -            mask_b = np.minimum(cxcywh_boxes[:, 2], cxcywh_boxes[:, 3]) > min_bbox_edge_size
                
 
                            -            boxes_t = boxes[mask_b]
                
 
                            -            labels_t = labels[mask_b]
                
 
                            +    def filter_small_bboxes(self, targets: np.ndarray) -> np.ndarray:
                
 
                            +        """Filter bboxes smaller than specified threshold."""
                
 
                            -            labels_t = np.expand_dims(labels_t, 1)
                
 
                            -            targets_t = np.hstack((labels_t, boxes_t)) if label_first_in_output else np.hstack((boxes_t, labels_t))
                
 
                            -            padded_targets = np.zeros((self.max_targets, 5))
                
 
                            -            padded_targets[range(len(targets_t))[: self.max_targets]] = targets_t[: self.max_targets]
                
 
                            -            padded_targets = np.ascontiguousarray(padded_targets, dtype=np.float32)
                
 
                            +        def _is_big_enough(bboxes: np.ndarray) -> np.ndarray:
                
 
                            +            return np.minimum(bboxes[:, 2], bboxes[:, 3]) > self.min_bbox_edge_size
                
 
                            -            return padded_targets
                
 
                            +        targets = filter_on_bboxes(fn=_is_big_enough, tensor=targets, tensor_format=self.output_format)
                
 
                            +        return targets
                
 
                            -        sample["target"] = _format_target(targets)
                
 
                            -        if crowd_targets is not None:
                
 
                            -            sample["crowd_target"] = _format_target(crowd_targets)
                
 
                            -        return sample
                
 
                            +    def pad_targets(self, targets: np.ndarray) -> np.ndarray:
                
 
                            +        """Pad targets."""
                
 
                            +        padded_targets = np.zeros((self.max_targets, targets.shape[-1]))
                
 
                            +        padded_targets[range(len(targets))[: self.max_targets]] = targets[: self.max_targets]
                
 
                            +        padded_targets = np.ascontiguousarray(padded_targets, dtype=np.float32)
                
 
                            +        return padded_targets
                
 
                             def get_aug_params(value: Union[tuple, float], center: float = 0):
                
 
            from .formats import ConcatenatedTensorFormat, BoundingBoxesTensorSliceItem, TensorSliceItem
from .detection_adapter import DetectionOutputAdapter

__all__ = ["DetectionOutputAdapter", "TensorSliceItem", "ConcatenatedTensorFormat", "BoundingBoxesTensorSliceItem"]

          
 
            import collections
from typing import Tuple, Union, List, Mapping

from super_gradients.training.utils.bbox_formats import BoundingBoxFormat


class DetectionOutputFormat:
    pass


class TensorSliceItem:
    length: int
    name: str

    def __init__(self, name: str, length: int):
        self.name = name
        self.length = length

    def __repr__(self):
        return f"name={self.name} length={self.length}"


class BoundingBoxesTensorSliceItem(TensorSliceItem):
    format: BoundingBoxFormat

    def __init__(self, name: str, format: BoundingBoxFormat):
        super().__init__(name, length=format.get_num_parameters())
        self.format = format

    def __repr__(self):
        return f"name={self.name} length={self.length} format={self.format}"


class ConcatenatedTensorFormat(DetectionOutputFormat):
    """
    Define the output format that return a single tensor of shape [N,M] (N - number of detections,
    M - sum of bbox attributes) that is a concatenated from bbox coordinates and other fields.
    A layout defines the order of concatenated tensors. For instance:
    - layout: (bboxes, scores, labels) gives a Tensor that is product of torch.cat([bboxes, scores, labels], dim=1)
    - layout: (labels, bboxes) produce a Tensor from torch.cat([labels, bboxes], dim=1)
    """

    layout: Mapping[str, TensorSliceItem]
    locations: Mapping[str, Tuple[int, int]]
    indexes: Mapping[str, List[int]]
    num_channels: int

    @property
    def bboxes_format(self) -> BoundingBoxesTensorSliceItem:
        bbox_items = [x for x in self.layout.values() if isinstance(x, BoundingBoxesTensorSliceItem)]
        return bbox_items[0]

    def __init__(self, layout: Union[List[TensorSliceItem], Tuple[TensorSliceItem, ...]]):
        bbox_items = [x for x in layout if isinstance(x, BoundingBoxesTensorSliceItem)]
        if len(bbox_items) != 1:
            raise RuntimeError("Number of bounding box items must be strictly equal to 1")

        _layout = []
        _locations = []
        _indexes = []

        offset = 0
        for item in layout:
            location_indexes = list(range(offset, offset + item.length))
            location_slice = offset, offset + item.length

            _layout.append((item.name, item))
            _locations.append((item.name, location_slice))
            _indexes.append((item.name, location_indexes))
            offset += item.length

        self.layout = collections.OrderedDict(_layout)
        self.locations = collections.OrderedDict(_locations)
        self.indexes = collections.OrderedDict(_indexes)
        self.num_channels = offset

    def __repr__(self):
        return str(self.layout)

          
@@ -6,8 +6,9 @@ import unittest
 
                             import numpy as np
                
 
                             import torch
                
 
                            +
                
 
                             from super_gradients.common.factories.bbox_format_factory import BBoxFormatFactory
                
 
                            -from super_gradients.training.utils.bbox_formats import (
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats import (
                
 
                                 CXCYWHCoordinateFormat,
                
 
                                 NormalizedXYXYCoordinateFormat,
                
 
                                 NormalizedXYWHCoordinateFormat,
                
@@ -19,21 +20,21 @@ from super_gradients.training.utils.bbox_formats import (
 
                                 BBOX_FORMATS,
                
 
                                 BoundingBoxFormat,
                
 
                             )
                
 
                            -from super_gradients.training.utils.bbox_formats.normalized_cxcywh import (
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.normalized_cxcywh import (
                
 
                                 normalized_cxcywh_to_xyxy_inplace,
                
 
                                 xyxy_to_normalized_cxcywh_inplace,
                
 
                                 xyxy_to_normalized_cxcywh,
                
 
                                 normalized_cxcywh_to_xyxy,
                
 
                             )
                
 
                            -from super_gradients.training.utils.bbox_formats.normalized_xywh import (
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.normalized_xywh import (
                
 
                                 xyxy_to_normalized_xywh_inplace,
                
 
                                 xyxy_to_normalized_xywh,
                
 
                                 normalized_xywh_to_xyxy_inplace,
                
 
                                 normalized_xywh_to_xyxy,
                
 
                             )
                
 
                            -from super_gradients.training.utils.bbox_formats.xywh import xyxy_to_xywh, xywh_to_xyxy, xywh_to_xyxy_inplace, xyxy_to_xywh_inplace
                
 
                            -from super_gradients.training.utils.bbox_formats.yxyx import xyxy_to_yxyx, xyxy_to_yxyx_inplace
                
 
                            -from super_gradients.training.utils.output_adapters.detection_adapter import ConvertBoundingBoxes
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.xywh import xyxy_to_xywh, xywh_to_xyxy, xywh_to_xyxy_inplace, xyxy_to_xywh_inplace
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats.yxyx import xyxy_to_yxyx, xyxy_to_yxyx_inplace
                
 
                            +from super_gradients.training.datasets.data_formats.output_adapters.detection_adapter import ConvertBoundingBoxes
                
 
                             class BBoxFormatsTest(unittest.TestCase):
                
@@ -255,7 +256,7 @@ class BBoxFormatsTest(unittest.TestCase):
 
                                             with tempfile.TemporaryDirectory() as tmpdirname:
                
 
                                                 adapter_fname = os.path.join(tmpdirname, "adapter.onnx")
                
 
                                                 # Just test that export works, we test the correctness in the detection_output_adapter_test.py
                
 
                            -                    torch.onnx.export(module, gt_bboxes.clone(), adapter_fname)
                
 
                            +                    torch.onnx.export(module, gt_bboxes.clone(), adapter_fname, opset_version=11)
                
 
                             if __name__ == "__main__":
                
@@ -7,8 +7,13 @@ import onnx
 
                             import onnxruntime as ort
                
 
                             import torch.jit
                
 
                            -from super_gradients.training.utils.bbox_formats import NormalizedXYWHCoordinateFormat, CXCYWHCoordinateFormat, YXYXCoordinateFormat
                
 
                            -from super_gradients.training.utils.output_adapters import DetectionOutputAdapter, ConcatenatedTensorFormat, BoundingBoxesTensorSliceItem, TensorSliceItem
                
 
                            +from super_gradients.training.datasets.data_formats.bbox_formats import NormalizedXYWHCoordinateFormat, CXCYWHCoordinateFormat, YXYXCoordinateFormat
                
 
                            +from super_gradients.training.datasets.data_formats.output_adapters.detection_adapter import DetectionOutputAdapter
                
 
                            +from super_gradients.training.datasets.data_formats import (
                
 
                            +    ConcatenatedTensorFormat,
                
 
                            +    BoundingBoxesTensorSliceItem,
                
 
                            +    TensorSliceItem,
                
 
                            +)
                
 
                             NORMALIZED_XYWH_SCORES_LABELS = ConcatenatedTensorFormat(
                
 
                                 layout=(
                
@@ -119,7 +124,7 @@ class TestDetectionOutputAdapter(unittest.TestCase):
 
                                         with tempfile.TemporaryDirectory() as tmpdirname:
                
 
                                             adapter_fname = os.path.join(tmpdirname, "adapter.onnx")
                
 
                            -                torch.onnx.export(adapter, inp, f=adapter_fname, input_names=["predictions"], output_names=["output_predictions"])
                
 
                            +                torch.onnx.export(adapter, inp, f=adapter_fname, input_names=["predictions"], output_names=["output_predictions"], opset_version=11)
                
 
                                             onnx_model = onnx.load(adapter_fname)
                
 
                                             onnx.checker.check_model(onnx_model)
                
@@ -2,7 +2,14 @@ import numpy as np
 
                             import unittest
                
 
                             from super_gradients.training.transforms.transforms import DetectionTargetsFormatTransform
                
 
                            -from super_gradients.training.utils.detection_utils import DetectionTargetsFormat
                
 
                            +
                
 
                            +from super_gradients.training.datasets.data_formats.default_formats import (
                
 
                            +    XYXY_LABEL,
                
 
                            +    LABEL_XYXY,
                
 
                            +    LABEL_CXCYWH,
                
 
                            +    LABEL_NORMALIZED_XYXY,
                
 
                            +    LABEL_NORMALIZED_CXCYWH,
                
 
                            +)
                
 
                             class DetectionTargetsTransformTest(unittest.TestCase):
                
@@ -12,110 +19,115 @@ class DetectionTargetsTransformTest(unittest.TestCase):
 
                                 def test_label_first_2_label_last(self):
                
 
                                     input = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
                
 
                                     output = np.array([[50, 10, 20, 30, 40]], dtype=np.float32)
                
 
                            -        transform = DetectionTargetsFormatTransform(max_targets=1,
                
 
                            -                                                    input_format=DetectionTargetsFormat.XYXY_LABEL,
                
 
                            -                                                    output_format=DetectionTargetsFormat.LABEL_XYXY)
                
 
                                     sample = {"image": self.image, "target": input}
                
 
                            -        self.assertTrue(np.array_equal(transform(sample)["target"], output))
                
 
                            +
                
 
                            +        transform = DetectionTargetsFormatTransform(image_shape=self.image.shape[1:], max_targets=1, input_format=XYXY_LABEL, output_format=LABEL_XYXY)
                
 
                            +        t_output = transform(sample)["target"]
                
 
                            +        self.assertTrue(np.allclose(output, t_output, atol=1e-6))
                
 
                                 def test_xyxy_2_normalized_xyxy(self):
                
 
                                     input = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
                
 
                                     _, h, w = self.image.shape
                
 
                                     output = np.array([[10, 20 / w, 30 / h, 40 / w, 50 / h]], dtype=np.float32)
                
 
                            -        transform = DetectionTargetsFormatTransform(max_targets=1,
                
 
                            -                                                    input_format=DetectionTargetsFormat.LABEL_XYXY,
                
 
                            -                                                    output_format=DetectionTargetsFormat.LABEL_NORMALIZED_XYXY)
                
 
                                     sample = {"image": self.image, "target": input}
                
 
                            +
                
 
                            +        transform = DetectionTargetsFormatTransform(
                
 
                            +            image_shape=self.image.shape[1:], max_targets=1, input_format=LABEL_XYXY, output_format=LABEL_NORMALIZED_XYXY
                
 
                            +        )
                
 
                                     t_output = transform(sample)["target"]
                
 
                            -        self.assertTrue(np.array_equal(output, t_output))
                
 
                            +        self.assertTrue(np.allclose(output, t_output, atol=1e-6))
                
 
                                 def test_xyxy_2_cxcywh(self):
                
 
                                     input = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
                
 
                                     _, h, w = self.image.shape
                
 
                                     output = np.array([[10, 30, 40, 20, 20]], dtype=np.float32)
                
 
                            -        transform = DetectionTargetsFormatTransform(max_targets=1,
                
 
                            -                                                    input_format=DetectionTargetsFormat.LABEL_XYXY,
                
 
                            -                                                    output_format=DetectionTargetsFormat.LABEL_CXCYWH)
                
 
                                     sample = {"image": self.image, "target": input}
                
 
                            +
                
 
                            +        transform = DetectionTargetsFormatTransform(image_shape=self.image.shape[1:], max_targets=1, input_format=LABEL_XYXY, output_format=LABEL_CXCYWH)
                
 
                                     t_output = transform(sample)["target"]
                
 
                            -        self.assertTrue(np.array_equal(output, t_output))
                
 
                            +        self.assertTrue(np.allclose(output, t_output, atol=1e-6))
                
 
                                 def test_xyxy_2_normalized_cxcywh(self):
                
 
                                     input = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
                
 
                                     _, h, w = self.image.shape
                
 
                                     output = np.array([[10, 30 / w, 40 / h, 20 / w, 20 / h]], dtype=np.float32)
                
 
                            -        transform = DetectionTargetsFormatTransform(max_targets=1,
                
 
                            -                                                    input_format=DetectionTargetsFormat.LABEL_XYXY,
                
 
                            -                                                    output_format=DetectionTargetsFormat.LABEL_NORMALIZED_CXCYWH)
                
 
                                     sample = {"image": self.image, "target": input}
                
 
                            +
                
 
                            +        transform = DetectionTargetsFormatTransform(
                
 
                            +            image_shape=self.image.shape[1:], max_targets=1, input_format=LABEL_XYXY, output_format=LABEL_NORMALIZED_CXCYWH
                
 
                            +        )
                
 
                                     t_output = transform(sample)["target"]
                
 
                            -        self.assertTrue(np.array_equal(output, t_output))
                
 
                            +        self.assertTrue(np.allclose(output, t_output, atol=1e-6))
                
 
                                 def test_normalized_xyxy_2_cxcywh(self):
                
 
                                     _, h, w = self.image.shape
                
 
                                     input = np.array([[10, 20 / w, 30 / h, 40 / w, 50 / h]], dtype=np.float32)
                
 
                                     output = np.array([[10, 30, 40, 20, 20]], dtype=np.float32)
                
 
                            -        transform = DetectionTargetsFormatTransform(max_targets=1,
                
 
                            -                                                    input_format=DetectionTargetsFormat.LABEL_NORMALIZED_XYXY,
                
 
                            -                                                    output_format=DetectionTargetsFormat.LABEL_CXCYWH)
                
 
                                     sample = {"image": self.image, "target": input}
                
 
                            +
                
 
                            +        transform = DetectionTargetsFormatTransform(
                
 
                            +            image_shape=self.image.shape[1:], max_targets=1, input_format=LABEL_NORMALIZED_XYXY, output_format=LABEL_CXCYWH
                
 
                            +        )
                
 
                                     t_output = transform(sample)["target"]
                
 
                            -        self.assertTrue(np.allclose(output, t_output))
                
 
                            +        self.assertTrue(np.allclose(output, t_output, atol=1e-6))
                
 
                                 def test_normalized_xyxy_2_normalized_cxcywh(self):
                
 
                                     _, h, w = self.image.shape
                
 
                                     input = np.array([[10, 20 / w, 30 / h, 40 / w, 50 / h]], dtype=np.float32)
                
 
                                     output = np.array([[10, 30 / w, 40 / h, 20 / w, 20 / h]], dtype=np.float32)
                
 
                            -        transform = DetectionTargetsFormatTransform(max_targets=1,
                
 
                            -                                                    input_format=DetectionTargetsFormat.LABEL_NORMALIZED_XYXY,
                
 
                            -                                                    output_format=DetectionTargetsFormat.LABEL_NORMALIZED_CXCYWH)
                
 
                                     sample = {"image": self.image, "target": input}
                
 
                            +
                
 
                            +        transform = DetectionTargetsFormatTransform(
                
 
                            +            image_shape=self.image.shape[1:], max_targets=1, input_format=LABEL_NORMALIZED_XYXY, output_format=LABEL_NORMALIZED_CXCYWH
                
 
                            +        )
                
 
                                     t_output = transform(sample)["target"]
                
 
                            -        self.assertTrue(np.allclose(output, t_output))
                
 
                            +        self.assertTrue(np.allclose(output, t_output, atol=1e-6))
                
 
                                 def test_cxcywh_2_xyxy(self):
                
 
                                     output = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
                
 
                                     input = np.array([[10, 30, 40, 20, 20]], dtype=np.float32)
                
 
                            -        transform = DetectionTargetsFormatTransform(max_targets=1,
                
 
                            -                                                    input_format=DetectionTargetsFormat.LABEL_CXCYWH,
                
 
                            -                                                    output_format=DetectionTargetsFormat.LABEL_XYXY)
                
 
                                     sample = {"image": self.image, "target": input}
                
 
                            +
                
 
                            +        transform = DetectionTargetsFormatTransform(image_shape=self.image.shape[1:], max_targets=1, input_format=LABEL_CXCYWH, output_format=LABEL_XYXY)
                
 
                                     t_output = transform(sample)["target"]
                
 
                            -        self.assertTrue(np.array_equal(output, t_output))
                
 
                            +        self.assertTrue(np.allclose(output, t_output, atol=1e-6))
                
 
                                 def test_cxcywh_2_normalized_xyxy(self):
                
 
                                     _, h, w = self.image.shape
                
 
                                     output = np.array([[10, 20 / w, 30 / h, 40 / w, 50 / h]], dtype=np.float32)
                
 
                                     input = np.array([[10, 30, 40, 20, 20]], dtype=np.float32)
                
 
                            -        transform = DetectionTargetsFormatTransform(max_targets=1,
                
 
                            -                                                    input_format=DetectionTargetsFormat.LABEL_CXCYWH,
                
 
                            -                                                    output_format=DetectionTargetsFormat.LABEL_NORMALIZED_XYXY)
                
 
                                     sample = {"image": self.image, "target": input}
                
 
                            +
                
 
                            +        transform = DetectionTargetsFormatTransform(
                
 
                            +            image_shape=self.image.shape[1:], max_targets=1, input_format=LABEL_CXCYWH, output_format=LABEL_NORMALIZED_XYXY
                
 
                            +        )
                
 
                                     t_output = transform(sample)["target"]
                
 
                            -        self.assertTrue(np.array_equal(output, t_output))
                
 
                            +        self.assertTrue(np.allclose(output, t_output, atol=1e-6))
                
 
                                 def test_normalized_cxcywh_2_xyxy(self):
                
 
                                     _, h, w = self.image.shape
                
 
                                     input = np.array([[10, 30 / w, 40 / h, 20 / w, 20 / h]], dtype=np.float32)
                
 
                                     output = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
                
 
                            -        transform = DetectionTargetsFormatTransform(max_targets=1,
                
 
                            -                                                    input_format=DetectionTargetsFormat.LABEL_NORMALIZED_CXCYWH,
                
 
                            -                                                    output_format=DetectionTargetsFormat.LABEL_XYXY)
                
 
                                     sample = {"image": self.image, "target": input}
                
 
                            +
                
 
                            +        transform = DetectionTargetsFormatTransform(
                
 
                            +            image_shape=self.image.shape[1:], max_targets=1, input_format=LABEL_NORMALIZED_CXCYWH, output_format=LABEL_XYXY
                
 
                            +        )
                
 
                                     t_output = transform(sample)["target"]
                
 
                            -        self.assertTrue(np.allclose(output, t_output))
                
 
                            +        self.assertTrue(np.allclose(output, t_output, atol=1e-6))
                
 
                                 def test_normalized_cxcywh_2_normalized_xyxy(self):
                
 
                                     _, h, w = self.image.shape
                
 
                                     output = np.array([[10, 20 / w, 30 / h, 40 / w, 50 / h]], dtype=np.float32)
                
 
                                     input = np.array([[10, 30 / w, 40 / h, 20 / w, 20 / h]], dtype=np.float32)
                
 
                            -        transform = DetectionTargetsFormatTransform(max_targets=1,
                
 
                            -                                                    input_format=DetectionTargetsFormat.LABEL_NORMALIZED_CXCYWH,
                
 
                            -                                                    output_format=DetectionTargetsFormat.LABEL_NORMALIZED_XYXY)
                
 
                                     sample = {"image": self.image, "target": input}
                
 
                            +
                
 
                            +        transform = DetectionTargetsFormatTransform(
                
 
                            +            image_shape=self.image.shape[1:], max_targets=1, input_format=LABEL_NORMALIZED_CXCYWH, output_format=LABEL_NORMALIZED_XYXY
                
 
                            +        )
                
 
                                     t_output = transform(sample)["target"]
                
 
                            -        self.assertTrue(np.allclose(output, t_output))
                
 
                            +        self.assertTrue(np.allclose(output, t_output, atol=1e-6))
                
 
                            -if __name__ == '__main__':
                
 
                            +if __name__ == "__main__":
                
 
                                 unittest.main()