from super_gradients.common.object_names import Models
from super_gradients.training import models

# Note that currently only YoloX and PPYoloE are supported.
model = models.get(Models.PP_YOLOE_S, pretrained_weights="coco")

IMAGES = [
    "https://miro.medium.com/v2/resize:fit:500/0*w1s81z-Q72obhE_z",
    "https://s.hs-data.com/bilder/spieler/gross/128069.jpg",
    "https://datasets-server.huggingface.co/assets/Chris1/cityscapes/--/Chris1--cityscapes/train/28/image/image.jpg",
]
prediction = model.predict(IMAGES, iou=0.65, conf=0.5)
prediction.show()

          
@@ -1,4 +1,4 @@
 
                            -from typing import Union
                
 
                            +from typing import Union, Optional, List
                
 
                             from torch import Tensor
                
@@ -11,6 +11,10 @@ from super_gradients.training.models.detection_models.pp_yolo_e.pan import Custo
 
                             from super_gradients.training.models.detection_models.pp_yolo_e.pp_yolo_head import PPYOLOEHead
                
 
                             from super_gradients.training.utils import HpmStruct
                
 
                             from super_gradients.training.models.arch_params_factory import get_arch_params
                
 
                            +from super_gradients.training.models.detection_models.pp_yolo_e.post_prediction_callback import PPYoloEPostPredictionCallback, DetectionPostPredictionCallback
                
 
                            +from super_gradients.training.models.results import DetectionResults
                
 
                            +from super_gradients.training.pipelines.pipelines import DetectionPipeline
                
 
                            +from super_gradients.training.transforms.processing import Processing
                
 
                             class PPYoloE(SgModule):
                
@@ -23,6 +27,37 @@ class PPYoloE(SgModule):
 
                                     self.neck = CustomCSPPAN(**arch_params["neck"], depth_mult=arch_params["depth_mult"], width_mult=arch_params["width_mult"])
                
 
                                     self.head = PPYOLOEHead(**arch_params["head"], width_mult=arch_params["width_mult"], num_classes=arch_params["num_classes"])
                
 
                            +        self._class_names: Optional[List[str]] = None
                
 
                            +        self._image_processor: Optional[Processing] = None
                
 
                            +
                
 
                            +    @staticmethod
                
 
                            +    def get_post_prediction_callback(conf: float, iou: float) -> DetectionPostPredictionCallback:
                
 
                            +        return PPYoloEPostPredictionCallback(score_threshold=conf, nms_threshold=iou, nms_top_k=1000, max_predictions=300)
                
 
                            +
                
 
                            +    def set_dataset_processing_params(self, class_names: Optional[List[str]], image_processor: Optional[Processing]) -> None:
                
 
                            +        """Set the processing parameters for the dataset.
                
 
                            +
                
 
                            +        :param class_names:     (Optional) Names of the dataset the model was trained on.
                
 
                            +        :param image_processor: (Optional) Image processing objects to reproduce the dataset preprocessing used for training.
                
 
                            +        """
                
 
                            +        self._class_names = class_names or self._class_names
                
 
                            +        self._image_processor = image_processor or self._image_processor
                
 
                            +
                
 
                            +    def predict(self, images, iou: float = 0.65, conf: float = 0.01) -> DetectionResults:
                
 
                            +
                
 
                            +        if self._class_names is None or self._image_processor is None:
                
 
                            +            raise RuntimeError(
                
 
                            +                "You must set the dataset processing parameters before calling predict.\n" "Please call `model.set_dataset_processing_params(...)` first."
                
 
                            +            )
                
 
                            +
                
 
                            +        pipeline = DetectionPipeline(
                
 
                            +            model=self,
                
 
                            +            image_processor=self._image_processor,
                
 
                            +            post_prediction_callback=self.get_post_prediction_callback(iou=iou, conf=conf),
                
 
                            +            class_names=self._class_names,
                
 
                            +        )
                
 
                            +        return pipeline(images)
                
 
                            +
                
 
                                 def forward(self, x: Tensor):
                
 
                                     features = self.backbone(x)
                
 
                                     features = self.neck(features)
                
@@ -1,5 +1,5 @@
 
                             import math
                
 
                            -from typing import Union, Type, List, Tuple
                
 
                            +from typing import Union, Type, List, Tuple, Optional
                
 
                             import torch
                
 
                             import torch.nn as nn
                
@@ -11,6 +11,10 @@ from super_gradients.training.models.sg_module import SgModule
 
                             from super_gradients.training.utils import torch_version_is_greater_or_equal
                
 
                             from super_gradients.training.utils.detection_utils import non_max_suppression, matrix_non_max_suppression, NMS_Type, DetectionPostPredictionCallback, Anchors
                
 
                             from super_gradients.training.utils.utils import HpmStruct, check_img_size_divisibility, get_param
                
 
                            +from super_gradients.training.models.results import DetectionResults
                
 
                            +from super_gradients.training.pipelines.pipelines import DetectionPipeline
                
 
                            +from super_gradients.training.transforms.processing import Processing
                
 
                            +
                
 
                             COCO_DETECTION_80_CLASSES_BBOX_ANCHORS = Anchors(
                
 
                                 [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], strides=[8, 16, 32]
                
@@ -80,6 +84,11 @@ class YoloPostPredictionCallback(DetectionPostPredictionCallback):
 
                                     self.with_confidence = with_confidence
                
 
                                 def forward(self, x, device: str = None):
                
 
                            +        """Apply NMS to the raw output of the model and keep only top `max_predictions` results.
                
 
                            +
                
 
                            +        :param x: Raw output of the model, with x[0] expected to be a list of Tensors of shape (cx, cy, w, h, confidence, cls0, cls1, ...)
                
 
                            +        :return: List of Tensors of shape (x1, y1, x2, y2, conf, cls)
                
 
                            +        """
                
 
                                     if self.nms_type == NMS_Type.ITERATIVE:
                
 
                                         nms_result = non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, with_confidence=self.with_confidence)
                
@@ -90,7 +99,6 @@ class YoloPostPredictionCallback(DetectionPostPredictionCallback):
 
                                 def _filter_max_predictions(self, res: List) -> List:
                
 
                                     res[:] = [im[: self.max_pred] if (im is not None and im.shape[0] > self.max_pred) else im for im in res]
                
 
                            -
                
 
                                     return res
                
@@ -408,6 +416,36 @@ class YoloBase(SgModule):
 
                                         self._head = YoloHead(self.arch_params)
                
 
                                         self._initialize_module()
                
 
                            +        self._class_names: Optional[List[str]] = None
                
 
                            +        self._image_processor: Optional[Processing] = None
                
 
                            +
                
 
                            +    @staticmethod
                
 
                            +    def get_post_prediction_callback(conf: float, iou: float) -> DetectionPostPredictionCallback:
                
 
                            +        return YoloPostPredictionCallback(conf=conf, iou=iou)
                
 
                            +
                
 
                            +    def set_dataset_processing_params(self, class_names: Optional[List[str]], image_processor: Optional[Processing]) -> None:
                
 
                            +        """Set the processing parameters for the dataset.
                
 
                            +
                
 
                            +        :param class_names:     (Optional) Names of the dataset the model was trained on.
                
 
                            +        :param image_processor: (Optional) Image processing objects to reproduce the dataset preprocessing used for training.
                
 
                            +        """
                
 
                            +        self._class_names = class_names or self._class_names
                
 
                            +        self._image_processor = image_processor or self._image_processor
                
 
                            +
                
 
                            +    def predict(self, images, iou: float = 0.65, conf: float = 0.01) -> DetectionResults:
                
 
                            +        if self._class_names is None or self._image_processor is None:
                
 
                            +            raise RuntimeError(
                
 
                            +                "You must set the dataset processing parameters before calling predict.\n" "Please call `model.set_dataset_processing_params(...)` first."
                
 
                            +            )
                
 
                            +
                
 
                            +        pipeline = DetectionPipeline(
                
 
                            +            model=self,
                
 
                            +            image_processor=self._image_processor,
                
 
                            +            post_prediction_callback=self.get_post_prediction_callback(iou=iou, conf=conf),
                
 
                            +            class_names=self._class_names,
                
 
                            +        )
                
 
                            +        return pipeline(images)
                
 
                            +
                
 
                                 def forward(self, x):
                
 
                                     out = self._backbone(x)
                
 
                                     out = self._head(out)
                
@@ -429,9 +467,7 @@ class YoloBase(SgModule):
 
                                     self._initialize_biases()
                
 
                                     self._initialize_weights()
                
 
                                     if self.arch_params.add_nms:
                
 
                            -            nms_conf = self.arch_params.nms_conf
                
 
                            -            nms_iou = self.arch_params.nms_iou
                
 
                            -            self._nms = YoloPostPredictionCallback(nms_conf, nms_iou)
                
 
                            +            self._nms = self.get_post_prediction_callback(conf=self.arch_params.nms_conf, iou=self.arch_params.nms_iou)
                
 
                                 def _check_strides(self):
                
 
                                     m = self._head._modules_list[-1]  # DetectX()
                
@@ -20,6 +20,7 @@ from super_gradients.training.utils.checkpoint_utils import (
 
                             )
                
 
                             from super_gradients.common.abstractions.abstract_logger import get_logger
                
 
                             from super_gradients.training.utils.sg_trainer_utils import get_callable_param_names
                
 
                            +from super_gradients.training.transforms.processing import get_pretrained_processing_params
                
 
                             logger = get_logger(__name__)
                
@@ -135,6 +136,9 @@ def instantiate_model(
 
                                             net.replace_head(new_num_classes=num_classes_new_head)
                
 
                                             arch_params.num_classes = num_classes_new_head
                
 
                            +            class_names, image_processor = get_pretrained_processing_params(model_name, pretrained_weights)
                
 
                            +            net.set_dataset_processing_params(class_names, image_processor)
                
 
                            +
                
 
                                 _add_model_name_attribute(net, model_name)
                
 
                                 return net
                
@@ -29,13 +29,27 @@ class DetectionPrediction(Prediction):
 
                                     :param labels:      Labels for each bounding box.
                
 
                                     :param image_shape: Shape of the image the prediction is made on, (H, W). This is used to convert bboxes to xyxy format
                
 
                                     """
                
 
                            +        self._validate_input(bboxes, confidence, labels)
                
 
                            +
                
 
                                     factory = BBoxFormatFactory()
                
 
                            -        self.bboxes_xyxy = convert_bboxes(
                
 
                            +        bboxes_xyxy = convert_bboxes(
                
 
                                         bboxes=bboxes,
                
 
                                         image_shape=image_shape,
                
 
                                         source_format=factory.get(bbox_format),
                
 
                                         target_format=factory.get("xyxy"),
                
 
                                         inplace=False,
                
 
                                     )
                
 
                            +
                
 
                            +        self.bboxes_xyxy = bboxes_xyxy
                
 
                                     self.confidence = confidence
                
 
                                     self.labels = labels
                
 
                            +
                
 
                            +    def _validate_input(self, bboxes: np.ndarray, confidence: np.ndarray, labels: np.ndarray) -> None:
                
 
                            +        n_bboxes, n_confidences, n_labels = bboxes.shape[0], confidence.shape[0], labels.shape[0]
                
 
                            +        if n_bboxes != n_confidences != n_labels:
                
 
                            +            raise ValueError(
                
 
                            +                f"The number of bounding boxes ({n_bboxes}) does not match the number of confidence scores ({n_confidences}) and labels ({n_labels})."
                
 
                            +            )
                
 
                            +
                
 
                            +    def __len__(self):
                
 
                            +        return len(self.bboxes_xyxy)
                
 
            from abc import ABC, abstractmethod
from typing import List, Optional, Tuple
from dataclasses import dataclass
from matplotlib import pyplot as plt

import numpy as np

from super_gradients.training.utils.detection_utils import DetectionVisualization
from super_gradients.training.models.predictions import Prediction, DetectionPrediction


@dataclass
class Result(ABC):
    """Results of a given computer vision task (detection, classification, etc.).

    :attr image:        Input image
    :attr predictions:  Predictions of the model
    :attr class_names:  List of the class names to predict
    """

    image: np.ndarray
    predictions: Prediction
    class_names: List[str]

    @abstractmethod
    def draw(self) -> np.ndarray:
        """Draw the predictions on the image."""
        pass

    @abstractmethod
    def show(self) -> None:
        """Display the predictions on the image."""
        pass


@dataclass
class Results(ABC):
    """List of results of a given computer vision task (detection, classification, etc.).

    :attr results: List of results of the run
    """

    results: List[Result]

    @abstractmethod
    def draw(self) -> List[np.ndarray]:
        """Draw the predictions on the image."""
        pass

    @abstractmethod
    def show(self) -> None:
        """Display the predictions on the image."""
        pass


@dataclass
class DetectionResult(Result):
    """Result of a detection task.

    :attr image:        Input image
    :attr predictions:  Predictions of the model
    :attr class_names:  List of the class names to predict
    """

    image: np.ndarray
    predictions: DetectionPrediction
    class_names: List[str]

    def draw(self, box_thickness: int = 2, show_confidence: bool = True, color_mapping: Optional[List[Tuple[int]]] = None) -> np.ndarray:
        """Draw the predicted bboxes on the image.

        :param box_thickness:   Thickness of bounding boxes.
        :param show_confidence: Whether to show confidence scores on the image.
        :param color_mapping:   List of tuples representing the colors for each class.
                                Default is None, which generates a default color mapping based on the number of class names.
        :return:                Image with predicted bboxes. Note that this does not modify the original image.
        """
        image_np = self.image.copy()
        color_mapping = color_mapping or DetectionVisualization._generate_color_mapping(len(self.class_names))

        for pred_i in range(len(self.predictions)):
            image_np = DetectionVisualization._draw_box_title(
                color_mapping=color_mapping,
                class_names=self.class_names,
                box_thickness=box_thickness,
                image_np=image_np,
                x1=int(self.predictions.bboxes_xyxy[pred_i, 0]),
                y1=int(self.predictions.bboxes_xyxy[pred_i, 1]),
                x2=int(self.predictions.bboxes_xyxy[pred_i, 2]),
                y2=int(self.predictions.bboxes_xyxy[pred_i, 3]),
                class_id=int(self.predictions.labels[pred_i]),
                pred_conf=self.predictions.confidence[pred_i] if show_confidence else None,
            )
        return image_np

    def show(self, box_thickness: int = 2, show_confidence: bool = True, color_mapping: Optional[List[Tuple[int]]] = None) -> None:
        """Display the image with predicted bboxes.

        :param box_thickness:   Thickness of bounding boxes.
        :param show_confidence: Whether to show confidence scores on the image.
        :param color_mapping:   List of tuples representing the colors for each class.
                                Default is None, which generates a default color mapping based on the number of class names.
        """
        image_np = self.draw(box_thickness=box_thickness, show_confidence=show_confidence, color_mapping=color_mapping)

        plt.imshow(image_np, interpolation="nearest")
        plt.axis("off")
        plt.show()


@dataclass
class DetectionResults(Results):
    """Results of a detection task.

    :attr results:  List of the predictions results
    """

    def __init__(self, images: List[np.ndarray], predictions: List[DetectionPrediction], class_names: List[str]):
        self.results: List[DetectionResult] = []
        for image, prediction in zip(images, predictions):
            self.results.append(DetectionResult(image=image, predictions=prediction, class_names=class_names))

    def draw(self, box_thickness: int = 2, show_confidence: bool = True, color_mapping: Optional[List[Tuple[int]]] = None) -> List[np.ndarray]:
        """Draw the predicted bboxes on the images.

        :param box_thickness:   Thickness of bounding boxes.
        :param show_confidence: Whether to show confidence scores on the image.
        :param color_mapping:   List of tuples representing the colors for each class.
                                Default is None, which generates a default color mapping based on the number of class names.
        :return:                List of Images with predicted bboxes for each image. Note that this does not modify the original images.
        """
        return [prediction.draw(box_thickness=box_thickness, show_confidence=show_confidence, color_mapping=color_mapping) for prediction in self.results]

    def show(self, box_thickness: int = 2, show_confidence: bool = True, color_mapping: Optional[List[Tuple[int]]] = None) -> None:
        """Display the predicted bboxes on the images.

        :param box_thickness:   Thickness of bounding boxes.
        :param show_confidence: Whether to show confidence scores on the image.
        :param color_mapping:   List of tuples representing the colors for each class.
                                Default is None, which generates a default color mapping based on the number of class names.
        """
        for prediction in self.results:
            prediction.show(box_thickness=box_thickness, show_confidence=show_confidence, color_mapping=color_mapping)

          
@@ -3,6 +3,7 @@ from typing import Union
 
                             from torch import nn
                
 
                             from super_gradients.training.utils.utils import HpmStruct
                
 
                            +from super_gradients.training.models.results import Result
                
 
                             class SgModule(nn.Module):
                
@@ -62,3 +63,10 @@ class SgModule(nn.Module):
 
                                     """
                
 
                                     raise NotImplementedError
                
 
                            +
                
 
                            +    def predict(self, images, *args, **kwargs) -> Result:
                
 
                            +        raise NotImplementedError(f"`predict` is not implemented for {self.__class__.__name__}.")
                
 
                            +
                
 
                            +    def set_dataset_processing_params(self, *args, **kwargs) -> None:
                
 
                            +        """Set the processing parameters for the dataset."""
                
 
                            +        pass
                
 
            
          
 
            from abc import ABC, abstractmethod
from typing import List, Optional, Tuple, Union
from contextlib import contextmanager

import numpy as np
import torch

from super_gradients.training.utils.load_image import load_images, ImageType
from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback
from super_gradients.training.models.sg_module import SgModule
from super_gradients.training.models.results import Results, DetectionResults
from super_gradients.training.models.predictions import Prediction, DetectionPrediction
from super_gradients.training.transforms.processing import Processing, ComposeProcessing


@contextmanager
def eval_mode(model: SgModule) -> None:
    """Set a model in evaluation mode and deactivate gradient computation, undo at the end.

    :param model: The model to set in evaluation mode.
    """
    _starting_mode = model.training
    model.eval()
    with torch.no_grad():
        yield
    model.train(mode=_starting_mode)


class Pipeline(ABC):
    """An abstract base class representing a processing pipeline for a specific task.
    The pipeline includes loading images, preprocessing, prediction, and postprocessing.

    :param model:           The model used for making predictions.
    :param image_processor: A single image processor or a list of image processors for preprocessing and postprocessing the images.
    :param device:          The device on which the model will be run. Defaults to "cpu". Use "cuda" for GPU support.
    """

    def __init__(self, model: SgModule, image_processor: Union[Processing, List[Processing]], device: Optional[str] = "cpu"):
        super().__init__()
        self.model = model.to(device)
        self.device = device

        if isinstance(image_processor, list):
            image_processor = ComposeProcessing(image_processor)
        self.image_processor = image_processor

    @abstractmethod
    def __call__(self, images: Union[ImageType, List[ImageType]]) -> Union[Results, Tuple[List[np.ndarray], List[Prediction]]]:
        """Apply the pipeline on images and return the result.

        :param images:  Single image or a list of images of supported types.
        :return         Results object containing the results of the prediction and the image.
        """
        return self._run(images=images)

    def _run(self, images: Union[ImageType, List[ImageType]]) -> Tuple[List[np.ndarray], List[Prediction]]:
        """Run the pipeline and return (image, predictions). The pipeline is made of 4 steps:
        1. Load images - Loading the images into a list of numpy arrays.
        2. Preprocess - Encode the image in the shape/format expected by the model
        3. Predict - Run the model on the preprocessed image
        4. Postprocess - Decode the output of the model so that the predictions are in the shape/format of original image.

        :param images:  Single image or a list of images of supported types.
        :return:
            - List of numpy arrays representing images.
            - List of model predictions.
        """
        self.model = self.model.to(self.device)  # Make sure the model is on the correct device, as it might have been moved after init

        images = load_images(images)

        # Preprocess
        preprocessed_images, processing_metadatas = [], []
        for image in images:
            preprocessed_image, processing_metadata = self.image_processor.preprocess_image(image=image.copy())
            preprocessed_images.append(preprocessed_image)
            processing_metadatas.append(processing_metadata)

        # Predict
        with eval_mode(self.model):
            torch_inputs = torch.Tensor(np.array(preprocessed_images)).to(self.device)
            model_output = self.model(torch_inputs)
            predictions = self._decode_model_output(model_output, model_input=torch_inputs)

        # Postprocess
        postprocessed_predictions = []
        for prediction, processing_metadata in zip(predictions, processing_metadatas):
            prediction = self.image_processor.postprocess_predictions(predictions=prediction, metadata=processing_metadata)
            postprocessed_predictions.append(prediction)

        return images, postprocessed_predictions

    @abstractmethod
    def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[Prediction]:
        """Decode the model outputs, move each prediction to numpy and store it in a Prediction object.

        :param model_output:    Direct output of the model, without any post-processing.
        :param model_input:     Model input (i.e. images after preprocessing).
        :return:                Model predictions, without any post-processing.
        """
        pass


class DetectionPipeline(Pipeline):
    """Pipeline specifically designed for object detection tasks.
    The pipeline includes loading images, preprocessing, prediction, and postprocessing.

    :param model:                       The object detection model (instance of SgModule) used for making predictions.
    :param class_names:                 List of class names corresponding to the model's output classes.
    :param post_prediction_callback:    Callback function to process raw predictions from the model.
    :param image_processor:             Single image processor or a list of image processors for preprocessing and postprocessing the images.
    :param device:                      The device on which the model will be run. Defaults to "cpu". Use "cuda" for GPU support.
    """

    def __init__(
        self,
        model: SgModule,
        class_names: List[str],
        post_prediction_callback: DetectionPostPredictionCallback,
        device: Optional[str] = "cpu",
        image_processor: Optional[Processing] = None,
    ):
        super().__init__(model=model, device=device, image_processor=image_processor)
        self.post_prediction_callback = post_prediction_callback
        self.class_names = class_names

    def __call__(self, images: Union[List[ImageType], ImageType]) -> DetectionResults:
        """Apply the pipeline on images and return the detection result.

        :param images:  Single image or a list of images of supported types.
        :return         Results object containing the results of the prediction and the image.
        """
        images, predictions = super().__call__(images=images)
        return DetectionResults(images=images, predictions=predictions, class_names=self.class_names)

    def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[DetectionPrediction]:
        """Decode the model output, by applying post prediction callback. This includes NMS.

        :param model_output:    Direct output of the model, without any post-processing.
        :param model_input:     Model input (i.e. images after preprocessing).
        :return:                Predicted Bboxes.
        """
        post_nms_predictions = self.post_prediction_callback(model_output, device=self.device)

        predictions = []
        for prediction, image in zip(post_nms_predictions, model_input):
            prediction if prediction is not None else torch.zeros((0, 6), dtype=torch.float32)
            prediction = prediction.detach().cpu().numpy()
            predictions.append(
                DetectionPrediction(
                    bboxes=prediction[:, :4],
                    confidence=prediction[:, 4],
                    labels=prediction[:, 5],
                    bbox_format="xyxy",
                    image_shape=image.shape,
                )
            )

        return predictions

          
@@ -1,4 +1,4 @@
 
                            -from typing import Tuple, List, Union
                
 
                            +from typing import Tuple, List, Union, Optional
                
 
                             from abc import ABC, abstractmethod
                
 
                             from dataclasses import dataclass
                
@@ -202,3 +202,43 @@ class DetectionLongestMaxSizeRescale(_LongestMaxSizeRescale):
 
                                 def postprocess_predictions(self, predictions: DetectionPrediction, metadata: RescaleMetadata) -> DetectionPrediction:
                
 
                                     predictions.bboxes_xyxy = _rescale_bboxes(targets=predictions.bboxes_xyxy, scale_factors=(1 / metadata.scale_factor_h, 1 / metadata.scale_factor_w))
                
 
                                     return predictions
                
 
                            +
                
 
                            +
                
 
                            +def get_pretrained_processing_params(model_name: str, pretrained_weights: str) -> Tuple[Optional[List[str]], Optional[Processing]]:
                
 
                            +    """Get the processing parameters for a pretrained model."""
                
 
                            +    if "yolox" in model_name and pretrained_weights == "coco":
                
 
                            +        return default_yolox_coco_processing_params()
                
 
                            +    elif "ppyoloe" in model_name and pretrained_weights == "coco":
                
 
                            +        return default_ppyoloe_coco_processing_params()
                
 
                            +    else:
                
 
                            +        return None, None
                
 
                            +
                
 
                            +
                
 
                            +def default_yolox_coco_processing_params() -> Tuple[List[str], Processing]:
                
 
                            +    """Processing parameters commonly used for training YoloX on COCO dataset."""
                
 
                            +    from super_gradients.training.datasets.datasets_conf import COCO_DETECTION_CLASSES_LIST
                
 
                            +
                
 
                            +    image_processor = ComposeProcessing(
                
 
                            +        [
                
 
                            +            DetectionLongestMaxSizeRescale((640, 640)),
                
 
                            +            DetectionBottomRightPadding((640, 640), 114),
                
 
                            +            ImagePermute((2, 0, 1)),
                
 
                            +        ]
                
 
                            +    )
                
 
                            +    class_names = COCO_DETECTION_CLASSES_LIST
                
 
                            +    return class_names, image_processor
                
 
                            +
                
 
                            +
                
 
                            +def default_ppyoloe_coco_processing_params() -> Tuple[List[str], Processing]:
                
 
                            +    """Processing parameters commonly used for training PPYoloE on COCO dataset."""
                
 
                            +    from super_gradients.training.datasets.datasets_conf import COCO_DETECTION_CLASSES_LIST
                
 
                            +
                
 
                            +    image_processor = ComposeProcessing(
                
 
                            +        [
                
 
                            +            DetectionRescale(output_shape=(640, 640)),
                
 
                            +            NormalizeImage(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]),
                
 
                            +            ImagePermute(permutation=(2, 0, 1)),
                
 
                            +        ]
                
 
                            +    )
                
 
                            +    class_names = COCO_DETECTION_CLASSES_LIST
                
 
                            +    return class_names, image_processor
                
 
            from typing import Union, List
import PIL

import numpy as np
import torch
import requests
from urllib.parse import urlparse

ImageType = Union[str, np.ndarray, torch.Tensor, PIL.Image.Image]


def load_images(images: Union[List[ImageType], ImageType]) -> List[np.ndarray]:
    """Load a single image or a list of images and return them as a list of numpy arrays.

    Supported image types include:
        - numpy.ndarray:    A numpy array representing the image
        - torch.Tensor:     A PyTorch tensor representing the image
        - PIL.Image.Image:  A PIL Image object
        - str:              A string representing either a local file path or a URL to an image

    :param images:  Single image or a list of images of supported types.
    :return:        List of images as numpy arrays. If loaded from string, the image will be returned as RGB.
    """
    if isinstance(images, list):
        return [load_image(image=image) for image in images]
    else:
        return [load_image(image=images)]


def load_image(image: ImageType) -> np.ndarray:
    """Load a single image and return it as a numpy arrays.

    Supported image types include:
        - numpy.ndarray:    A numpy array representing the image
        - torch.Tensor:     A PyTorch tensor representing the image
        - PIL.Image.Image:  A PIL Image object
        - str:              A string representing either a local file path or a URL to an image

    :param image: Single image of supported types.
    :return:      Image as numpy arrays. If loaded from string, the image will be returned as RGB.
    """
    if isinstance(image, np.ndarray):
        return image
    elif isinstance(image, torch.Tensor):
        return image.numpy()
    elif isinstance(image, PIL.Image.Image):
        return load_np_image_from_pil(image)
    elif isinstance(image, str):
        image = load_pil_image_from_str(image_str=image)
        return load_np_image_from_pil(image)
    else:
        raise ValueError(f"Unsupported image type: {type(image)}")


def load_np_image_from_pil(image: PIL.Image.Image) -> np.ndarray:
    """Convert a PIL image to numpy array in RGB format."""
    return np.asarray(image.convert("RGB"))


def load_pil_image_from_str(image_str: str) -> PIL.Image.Image:
    """Load an image based on a string (local file path or URL)."""

    if is_url(image_str):
        response = requests.get(image_str, stream=True)
        response.raise_for_status()
        return PIL.Image.open(response.raw)
    else:
        return PIL.Image.open(image_str)


def is_url(url: str) -> bool:
    """Check if the given string is a URL."""
    try:
        result = urlparse(url)
        return all([result.scheme, result.netloc, result.path])
    except Exception:
        return False