Source code for dgenerate.imageprocessors.imageops

# Copyright (c) 2023, Teriks
#
# dgenerate is distributed under the following BSD 3-Clause License
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in
#    the documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import re

import PIL.Image
import PIL.ImageOps

import dgenerate.image as _image
import dgenerate.imageprocessors.imageprocessor as _imageprocessor
import dgenerate.textprocessing as _textprocessing
import dgenerate.types as _types

__doc__ = """This module provides some basic image processing capabilities using PIL (Pillow)."""



[docs]
class MirrorFlipProcessor(_imageprocessor.ImageProcessor):
    """
    Implements the "mirror" and "flip" PIL.ImageOps operations
    as an image imageprocessor

    The "pre-resize" argument determines if the processing occurs before or after dgenerate resizes the image.
    This defaults to False, meaning the image is processed after dgenerate is done resizing it.
    """

    NAMES = ['mirror', 'flip']

    # hide inherited arguments
    # that are device related
    HIDE_ARGS = ['device', 'model-offload']


[docs]
    @staticmethod
    def help(loaded_by_name):
        if loaded_by_name == 'mirror':
            return 'Mirror the input image horizontally.'
        if loaded_by_name == 'flip':
            return 'Flip the input image vertically.'



[docs]
    def __init__(self, pre_resize: bool = False, **kwargs):
        """
        :param kwargs: forwarded to base class
        """
        super().__init__(**kwargs)

        if self.loaded_by_name == 'flip':
            self._func = PIL.ImageOps.flip
        elif self.loaded_by_name == 'mirror':
            self._func = PIL.ImageOps.mirror

        self._pre_resize = pre_resize



[docs]
    def impl_pre_resize(self, image: PIL.Image.Image, resize_resolution: _types.OptionalSize):
        """
        Mirrors or flips the image depending on what name was used to invoke this
        imageprocessor implementation.

        Executes if ``pre_resize`` constructor argument was ``True``.

        :param image: image to process
        :param resize_resolution: purely informational, is unused by this imageprocessor
        :return: the mirrored or flipped image.
        """
        if self._pre_resize:
            return self._func(image)
        return image



[docs]
    def impl_post_resize(self, image: PIL.Image.Image):
        """
        Mirrors or flips the image depending on what name was used to invoke this
        imageprocessor implementation.

        Executes if ``pre_resize`` constructor argument was ``False``.

        :param image: image to process
        :return: the mirrored or flipped image.
        """
        if not self._pre_resize:
            return self._func(image)
        return image



[docs]
    def to(self, device) -> "MirrorFlipProcessor":
        """
        Does nothing for this processor.
        :param device: the device
        :return: this processor
        """
        return self





[docs]
class SimpleColorProcessor(_imageprocessor.ImageProcessor):
    """
    Implements the "grayscale" and "invert" PIL.ImageOps operations
    as an image imageprocessor.

    The "pre-resize" argument determines if the processing occurs before or after dgenerate resizes the image.
    This defaults to False, meaning the image is processed after dgenerate is done resizing it.
    """

    NAMES = ['grayscale', 'invert']

    # hide inherited arguments
    # that are device related
    HIDE_ARGS = ['device', 'model-offload']


[docs]
    @staticmethod
    def help(loaded_by_name):
        if loaded_by_name == 'grayscale':
            return 'Convert the input image to grayscale.'
        if loaded_by_name == 'invert':
            return 'Invert the colors of the input image.'



[docs]
    def __init__(self, pre_resize: bool = False, **kwargs):
        """
        :param kwargs: forwarded to base class
        """
        super().__init__(**kwargs)
        if self.loaded_by_name == 'grayscale':
            self._func = PIL.ImageOps.grayscale
        elif self.loaded_by_name == 'invert':
            self._func = PIL.ImageOps.invert

        self._pre_resize = pre_resize



[docs]
    def impl_pre_resize(self, image: PIL.Image.Image, resize_resolution: _types.OptionalSize):
        """
        Invert or grayscale the image depending on which name was used to invoke this imageprocessor.

        Executes if ``pre_resize`` constructor argument was ``True``.

        :param image: image to process
        :param resize_resolution: purely informational, is unused by this imageprocessor
        :return: the inverted or grayscale image
        """
        if self._pre_resize:
            return self._func(image)
        return image



[docs]
    def impl_post_resize(self, image: PIL.Image.Image):
        """
        Invert or grayscale the image depending on which name was used to invoke this imageprocessor.

        Executes if ``pre_resize`` constructor argument was ``False``.

        :param image: image to process
        :return: the inverted or grayscale image
        """

        if not self._pre_resize:
            return self._func(image)
        return image



[docs]
    def to(self, device) -> "SimpleColorProcessor":
        """
        Does nothing for this processor.
        :param device: the device
        :return: this processor
        """
        return self





[docs]
class PosterizeProcessor(_imageprocessor.ImageProcessor):
    """
    Posterize the input image with PIL.ImageOps.posterize.

    Accepts the argument 'bits', an integer value from 1 to 8.

    The "pre-resize" argument determines if the processing occurs before or after dgenerate resizes the image.
    This defaults to False, meaning the image is processed after dgenerate is done resizing it.
    """

    NAMES = ['posterize']

    # hide inherited arguments
    # that are device related
    HIDE_ARGS = ['device', 'model-offload']


[docs]
    def __init__(self, bits: int, pre_resize: bool = False, **kwargs):
        """
        :param bits: required argument, integer value from 1 to 8
        :param kwargs: forwarded to base class
        """
        super().__init__(**kwargs)

        self._bits = bits
        self._pre_resize = pre_resize

        if self._bits < 1 or self._bits > 8:
            raise self.argument_error(
                f'Argument "bits" must be an integer value from 1 to 8, received {self._bits}.')



[docs]
    def impl_pre_resize(self, image: PIL.Image.Image, resize_resolution: _types.OptionalSize):
        """
        Posterize operation is performed by this method if ``pre_resize`` constructor argument was ``True``.

        :param image: image to process
        :param resize_resolution: purely informational, is unused by this imageprocessor
        :return: the posterized image
        """
        if self._pre_resize:
            return PIL.ImageOps.posterize(image, self._bits)
        return image



[docs]
    def impl_post_resize(self, image: PIL.Image.Image):
        """
        Posterize operation is performed by this method if ``pre_resize`` constructor argument was ``False``.

        :param image: image to process
        :return: the posterized image
        """
        if not self._pre_resize:
            return PIL.ImageOps.posterize(image, self._bits)
        return image



[docs]
    def to(self, device) -> "PosterizeProcessor":
        """
        Does nothing for this processor.
        :param device: the device
        :return: this processor
        """
        return self





[docs]
class SolarizeProcessor(_imageprocessor.ImageProcessor):
    """
    Solarize the input image with PIL.ImageOps.solarize.

    Accepts the argument "threshold" which is an integer value from 0 to 255.

    The "pre-resize" argument determines if the processing occurs before or after dgenerate resizes the image.
    This defaults to False, meaning the image is processed after dgenerate is done resizing it.
    """

    NAMES = ['solarize']

    # hide inherited arguments
    # that are device related
    HIDE_ARGS = ['device', 'model-offload']


[docs]
    def __init__(self, threshold: int = 128, pre_resize: bool = False, **kwargs):
        """
        :param threshold: integer value from 0 to 255, default is 128
        :param kwargs: forwarded to base class
        """
        super().__init__(**kwargs)

        self._threshold = threshold
        self._pre_resize = pre_resize

        if self._threshold < 0 or self._threshold > 255:
            raise self.argument_error(
                f'Argument "threshold" must be an integer value from 0 to 255, received {self._threshold}.')



[docs]
    def impl_pre_resize(self, image: PIL.Image.Image, resize_resolution: _types.OptionalSize):
        """
        Solarize operation is performed by this method if ``pre_resize`` constructor argument was ``True``.

        :param image: image to process
        :param resize_resolution: purely informational, is unused by this imageprocessor
        :return: the solarized image
        """
        if self._pre_resize:
            return PIL.ImageOps.solarize(image, self._threshold)
        return image



[docs]
    def impl_post_resize(self, image: PIL.Image.Image):
        """
        Solarize operation is performed by this method if ``pre_resize`` constructor argument was ``False``.

        :param image: image to process
        :return: the solarized image
        """
        if not self._pre_resize:
            return PIL.ImageOps.solarize(image, self._threshold)
        return image



[docs]
    def to(self, device) -> "SolarizeProcessor":
        """
        Does nothing for this processor.
        :param device: the device
        :return: this processor
        """
        return self





[docs]
class ResizeProcessor(_imageprocessor.ImageProcessor):
    """
    Resize an image using basic resampling algorithms.

    The "size" argument is the new image size.

    The "scale" argument is either a single floating point value to scale both dimensions by,
    or a tuple of two floating point values to scale x and y dimensions separately.
    This is mutually exclusive with "size". When specifying a tuple, you
    may use CSV, for example: "2,1", meaning X*2, Y*1.

    The "align" argument is the new image alignment.

    The "aspect-correct" argument is a boolean argument that determines if the resize is aspect correct.

    The "algo" argument is the resize filtering algorithm, which can be one of:
    "auto", "nearest", "box", "bilinear", "hamming", "bicubic", "lanczos"

    The "pre-resize" argument determines if the processing occurs before or after dgenerate resizes the image.
    This defaults to False, meaning the image is processed after dgenerate is done resizing it.
    """

    NAMES = ['resize']

    # hide inherited arguments
    # that are device related
    HIDE_ARGS = ['device', 'model-offload']


[docs]
    def __init__(self,
                 size: str | None = None,
                 scale: float | tuple[float, float] | None = None,
                 align: int | None = None,
                 aspect_correct: bool = True,
                 algo: str = 'auto',
                 pre_resize: bool = False,
                 **kwargs):
        super().__init__(**kwargs)

        if (size is None and scale is None) and align is None:
            raise self.argument_error('no arguments provided that result in resizing.')

        if size is not None and scale is not None:
            raise self.argument_error('arguments "size" and "scale" are mutually exclusive.')

        if algo not in {"auto", "nearest", "box", "bilinear", "hamming", "bicubic", "lanczos"}:
            raise self.argument_error(
                'algo must be one of: "auto", "nearest", "box", "bilinear", "hamming", "bicubic", "lanczos"')

        if size is not None:
            try:
                self._size = _textprocessing.parse_image_size(size)
            except ValueError as e:
                raise self.argument_error(str(e).strip()) from e
        else:
            self._size = None

        if align is not None and align < 1:
            raise self.argument_error('align must not be less than 1.')

        self._align = align
        self._aspect_correct = aspect_correct
        self._algo = algo
        self._pre_resize = pre_resize
        self._scale = scale


    def _process(self, image: PIL.Image.Image):
        if self._algo == 'auto':
            algo = None
        else:
            algo = getattr(PIL.Image.Resampling, self._algo.upper())

        # Calculate size based on scale if provided
        size = self._size
        if self._scale is not None:
            w, h = image.size
            if isinstance(self._scale, tuple):
                # Apply different scales to x and y
                scale_x, scale_y = self._scale
                size = (int(w * scale_x), int(h * scale_y))
            else:
                # Apply same scale to both dimensions
                size = (int(w * self._scale), int(h * self._scale))

        return _image.resize_image(img=image,
                                   size=size,
                                   aspect_correct=self._aspect_correct,
                                   align=self._align,
                                   algo=algo)


[docs]
    def impl_pre_resize(self, image: PIL.Image.Image, resize_resolution: _types.OptionalSize):
        """
        Does nothing, no-op.

        :param image: the image.
        :param resize_resolution: dimension dgenerate will resize to.
        :return: The same image.
        """
        if self._pre_resize:
            return self._process(image)
        return image



[docs]
    def impl_post_resize(self, image: PIL.Image.Image):
        """
        Perform the resize operation.

        :param image: The image after being resized by dgenerate.
        :return: The resized image.
        """

        if not self._pre_resize:
            return self._process(image)
        return image



[docs]
    def to(self, device) -> "ResizeProcessor":
        """
        Does nothing for this processor.
        :param device: the device
        :return: this processor
        """
        return self





[docs]
class LetterboxProcessor(_imageprocessor.ImageProcessor):
    """
    Letterbox an image.

    The "box-size" argument is the size of the outer letterbox.

    In non-padding mode "box-size" may be specified as the absolute size of the final image "WIDTHxHEIGHT",
    or with a single integer denoting both width and height.

    The "box-is-padding" argument can be used to indicate that "box-size" should be interpreted as padding.

    When in padding mode, "box-size" can be specified as a width / height padding around the original image
    i.e. "WIDTHxHEIGHT", (a single integer can also suffice). Or as a four part padding value: "LEFTxTOPxRIGHTxBOTTOM"

    The "box-color" argument specifies the color to use for the letter box background, the default is black.
    This should be specified as a HEX color code. e.g. #FFFFFF or #FFF

    The "inner-size" argument specifies the size of the inner image, in the form: "WIDTHxHEIGHT"

    The "aspect-correct" argument can be used to determine if the aspect ratio
    of the inner image is maintained or not.

    The "pre-resize" argument determines if the processing occurs before or after dgenerate resizes the image.
    This defaults to False, meaning the image is processed after dgenerate is done resizing it.
    """

    NAMES = ['letterbox']

    # hide inherited arguments
    # that are device related
    HIDE_ARGS = ['device', 'model-offload']

    @staticmethod
    def _match_hex_color(color):
        pattern = r'^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$'
        if re.match(pattern, color):
            return True
        else:
            return False


[docs]
    def __init__(self,
                 box_size: str,
                 box_is_padding: bool = False,
                 box_color: str | None = None,
                 inner_size: str | None = None,
                 aspect_correct: bool = True,
                 pre_resize: bool = False,
                 **kwargs):
        """
        :param box_size: Size of the outer letterbox, or padding
        :param box_is_padding: The ``letterbox_size`` argument should be interpreted as padding?
        :param box_color: What color to use for the letter box background, the default is black.
            This should be specified in as a HEX color code.
        :param inner_size: The size of the inner image
        :param aspect_correct: Should the size of the inner image be aspect correct?
        """
        super().__init__(**kwargs)

        if box_color is not None and not self._match_hex_color(box_color):
            raise self.argument_error('box-color must be a HEX color code, e.g. #FFFFFF or #FFF')

        try:
            dim = _textprocessing.parse_dimensions(box_size)

            if box_is_padding:
                if len(dim) not in {1, 2, 4}:
                    raise self.argument_error('Argument "box_size" must be a 1, 2, or 4 dimensional padding value.')
            else:
                if len(dim) not in {1, 2}:
                    raise self.argument_error('Argument "box_size" must be a 1 or 2 dimensional image size value.')

            if len(dim) == 1:
                self._box_size = dim[0]
            else:
                self._box_size = dim
        except ValueError as e:
            raise self.argument_error(f'Could not parse the "box_size" argument as a dimension: {e}') from e

        self._box_is_padding = box_is_padding
        self._box_color = box_color

        try:
            if inner_size:
                self._inner_size = _textprocessing.parse_image_size(inner_size)
            else:
                self._inner_size = None
        except ValueError as e:
            raise self.argument_error(f'Could not parse the "inner_size" argument as an image dimension: {e}') from e

        self._aspect_correct = aspect_correct
        self._pre_resize = pre_resize


    def _process(self, image: PIL.Image.Image):
        return _image.letterbox_image(image,
                                      box_size=self._box_size,
                                      box_is_padding=self._box_is_padding,
                                      box_color=self._box_color,
                                      inner_size=self._inner_size,
                                      aspect_correct=self._aspect_correct)


[docs]
    def impl_pre_resize(self, image: PIL.Image.Image, resize_resolution: _types.OptionalSize):
        """
        Letterbox operation is performed by this method if ``pre_resize`` constructor argument was ``True``.

        :param image: image to process
        :param resize_resolution: purely informational, is unused by this imageprocessor
        :return: the letterboxed image
        """
        if self._pre_resize:
            return self._process(image)
        return image



[docs]
    def impl_post_resize(self, image: PIL.Image.Image):
        """
        Letterbox operation is performed by this method if ``pre_resize`` constructor argument was ``False``.

        :param image: image to process
        :return: the letterboxed image
        """
        if not self._pre_resize:
            return self._process(image)
        return image



[docs]
    def to(self, device) -> "LetterboxProcessor":
        """
        Does nothing for this processor.
        :param device: the device
        :return: this processor
        """
        return self





[docs]
class CropProcessor(_imageprocessor.ImageProcessor):
    """
    Crop the input image to a specified box region.
    
    The "box" argument specifies the crop region in the format "LEFTxTOPxRIGHTxBOTTOM",
    where each value represents pixel coordinates. For example: "100x50x300x400" will crop
    the image with top left: (x=100, y=50), bottom right: (x=300, y=400)

    The "pre-resize" argument determines if the cropping occurs before or
    after dgenerate resizes the image. This defaults to False, meaning the image is
    cropped after dgenerate is done resizing it.
    """

    NAMES = ['crop']

    HIDE_ARGS = ['device', 'model-offload']


[docs]
    def __init__(self, box: str, pre_resize: bool = False, **kwargs):
        """
        :param box: Crop region in format "LEFTxTOPxRIGHTxBOTTOM"
        :param pre_resize: Whether to crop before or after dgenerate's resize operation
        :param kwargs: forwarded to base class
        """
        super().__init__(**kwargs)

        # Parse the box string in format LEFTxTOPxRIGHTxBOTTOM
        try:
            parts = _textprocessing.parse_dimensions(box)
        except ValueError:
            raise self.argument_error(
                'Error parsing "box" argument: {box}. Expected format (integers): LEFTxTOPxRIGHTxBOTTOM'
            )

        if len(parts) != 4:
            raise self.argument_error(
                'Argument "box" must be in format (integers): LEFTxTOPxRIGHTxBOTTOM')

        self._box = parts

        left, top, right, bottom = self._box
        if left < 0 or top < 0:
            raise self.argument_error('Argument "box": Left and top coordinates must be non-negative')
        if right <= left:
            raise self.argument_error('Argument "box": Right coordinate must be greater than left')
        if bottom <= top:
            raise self.argument_error('Argument "box": Bottom coordinate must be greater than top')

        self._pre_resize = pre_resize



[docs]
    def impl_pre_resize(self, image: PIL.Image.Image, resize_resolution: _types.OptionalSize):
        """
        Crop the image if pre_resize is True.

        :param image: image to process
        :param resize_resolution: purely informational, is unused by this processor
        :return: the cropped image if pre_resize is True, otherwise the original image
        """
        if self._pre_resize:
            return image.crop(self._box)
        return image



[docs]
    def impl_post_resize(self, image: PIL.Image.Image):
        """
        Crop the image if pre_resize is False.

        :param image: image to process
        :return: the cropped image if pre_resize is False, otherwise the original image
        """
        if not self._pre_resize:
            return image.crop(self._box)
        return image



[docs]
    def to(self, device) -> "CropProcessor":
        """
        Does nothing for this processor.
        :param device: the device
        :return: this processor
        """
        return self




__all__ = _types.module_all()