Source code for dgenerate.image

# Copyright (c) 2023, Teriks
#
# dgenerate is distributed under the following BSD 3-Clause License
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in
#    the documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import math
import typing

import PIL.Image
import PIL.ImageDraw
import PIL.ImageFilter
import PIL.ImageChops
import cv2
import numpy
import piexif
import piexif.helper
import dgenerate.textprocessing as _textprocessing
import dgenerate.types as _types

__doc__ = """
Image operations commonly used by dgenerate.
"""



[docs]
def is_image(obj) -> bool:
    """
    Check if an object is a PIL Image.

    :param obj: object to check
    :return: ``True`` if the object is a ``PIL.Image.Image``
    """
    return isinstance(obj, PIL.Image.Image)




[docs]
def resize_image_calc(old_size: _types.Size,
                      new_size: _types.OptionalSize,
                      aspect_correct: bool = False,
                      align: int | None = None):
    """
    Calculate the new dimensions for a requested resize of an image..


    :param old_size: The old image size
    :param new_size: The new image size

    :param aspect_correct: preserve aspect ratio?

    :param align: Ensure returned size is aligned to this value.

    :return: calculated new size
    """

    if align is not None and align < 1:
        raise ValueError('align value may not be less than 1.')

    if new_size is None:
        if align is not None:
            return align_by(old_size, align=align)
        return old_size

    if align is not None:
        new_size = align_by(new_size, align=align)

    if old_size == new_size:
        return old_size

    if aspect_correct:
        width = new_size[0]
        w_percent = (width / float(old_size[0]))
        hsize = int((float(old_size[1]) * float(w_percent)))
        if align is not None:
            return width, hsize - hsize % align
        return width, hsize
    else:
        return new_size




[docs]
def is_power_of_two(iterable: typing.Iterable[int]) -> bool:
    """
    Check if all elements are a power of 2.

    :param iterable: Elements to test

    :return: bool
    """
    for n in iterable:
        if n <= 0:
            return False
        if not (n & (n - 1)) == 0:
            return False
    return True




[docs]
def nearest_power_of_two(iterable: typing.Iterable[int]) -> tuple:
    """
    Round all elements to the nearest power of two and return a tuple.

    :param iterable: Elements to round

    :return: tuple(...)
    """

    result = []
    for number in iterable:
        if number <= 0:
            result.append(0)
            continue

        lower_power_of_two = 2 ** int(math.log2(number))
        higher_power_of_two = 2 ** (int(math.log2(number)) + 1)

        if abs(number - lower_power_of_two) < abs(number - higher_power_of_two):
            result.append(lower_power_of_two)
        else:
            result.append(higher_power_of_two)

    return tuple(result)




[docs]
def is_aligned(iterable: typing.Iterable[int], align: int) -> bool:
    """
    Check if all elements are aligned by a specific value.

    :param iterable: Elements to test
    :param align: The alignment value, ``None`` indicates no alignment.

    :return: bool
    """

    if align is None:
        return True

    if align < 1:
        raise ValueError('align value may not be less than 1.')

    return all(i % align == 0 for i in iterable)




[docs]
def align_by(iterable: typing.Iterable[int], align: int) -> tuple:
    """
    Align all elements by a value and return a tuple

    :param iterable: Elements to align
    :param align: The alignment value, ``None`` indicates no alignment.

    :return: tuple(...)
    """

    if align is None:
        align = 1

    if align < 1:
        raise ValueError('align value may not be less than 1.')

    return tuple(i - i % align for i in iterable)




[docs]
def copy_img(img: PIL.Image.Image):
    """
    Copy a :py:class:`PIL.Image.Image` while preserving its filename attribute.

    :param img: the image
    :return: a copy of the image
    """
    c = img.copy()

    if hasattr(img, 'filename'):
        c.filename = img.filename

    return c



[docs]
def normalize_padding_value(padding: str | int | tuple[int, int] | tuple[int, int, int, int]) -> tuple[int, int, int, int]:
    """
    Normalize a padding value.

    This value can be a string, e.g. ``"10"``, or ``"10x10"``, or ``"10x10x10x10"``

    It can also be specified as a python ``int`` or ``tuple``

    Multidimensional padding values are laid out as: ``LEFTxTOPxRIGHTxBOTTOM``, or ``WIDTHxHEIGHT``

    This is the same all across dgenerate.

    :raise ValueError: If the padding value is specified incorrectly.

    :param padding: Padding value
    :return: Normalized padding (4 tuple of int)
    """

    # Parse padding argument
    if isinstance(padding, int):
        padding = (padding, padding, padding, padding)  # left, top, right, bottom
    else:
        if isinstance(padding, str):
            try:
                padding_dims = _textprocessing.parse_dimensions(str(padding))
            except ValueError as e:
                raise ValueError(f'Could not parse the padding value: {padding}') from e
        else:
            padding_dims = padding

        if len(padding_dims) == 1:
            # Uniform padding
            p = padding_dims[0]
            padding = (p, p, p, p)
        elif len(padding_dims) == 2:
            # Width x Height padding
            width_pad, height_pad = padding_dims
            padding = (width_pad, height_pad, width_pad, height_pad)
        elif len(padding_dims) == 4:
            # Left x Top x Right x Bottom padding
            padding = tuple(padding_dims)
        else:
            raise ValueError(
                'Padding value must be 1, 2, or 4 dimensional. '
                'Use format: "10" (uniform), "10x20" (width x height), '
                'or "5x10x5x15" (left x top x right x bottom)')

    return padding



[docs]
def find_mask_bounds(
        img: PIL.Image.Image,
        padding: str | int | tuple[int, int] | tuple[int, int, int, int]) -> tuple[int, int, int, int] | None:
    """
    Find the bounding box of white pixels in the mask. If no bounding box can be found, return ``None``.

    :raise ValueError: If the padding value is specified incorrectly.

    :param img: The mask image (PIL Image)
    :param padding: Bounding box padding value, see: :py:func:`normalize_padding_value` for accepted values.
    :return: Tuple of (left, top, right, bottom) bounds, or ``None`` if no white pixels found.
    """

    # bit map grayscale
    if img.mode != 'L':
        img = img.convert('L')

    # Convert to numpy array
    mask_array = numpy.array(img)

    # Find coordinates of white pixels (assuming white is > 127)
    white_coords = numpy.where(mask_array > 127)

    if len(white_coords[0]) == 0:
        # No white pixels found
        return None

    # Get bounding box
    top = int(numpy.min(white_coords[0]))
    bottom = int(numpy.max(white_coords[0]))
    left = int(numpy.min(white_coords[1]))
    right = int(numpy.max(white_coords[1]))

    # Apply padding
    pad_left, pad_top, pad_right, pad_bottom = normalize_padding_value(padding)

    # final bounds with padding
    left = max(0, left - pad_left)
    top = max(0, top - pad_top)
    right = min(img.width, right + pad_right + 1)  # +1 because right bound is exclusive
    bottom = min(img.height, bottom + pad_bottom + 1)  # +1 because bottom bound is exclusive

    return left, top, right, bottom




[docs]
def best_pil_resampling(old_size: _types.Size, new_size: _types.Size) -> PIL.Image.Resampling:
    """
    Auto-select the best PIL resampling setting for a resize operation.

    :param old_size: (tuple) Source image size (width, height).
    :param new_size: (tuple) Destination image size (width, height).
    :return: (PIL.Image.Resampling) Best resampling method.
    """
    scale_x = new_size[0] / old_size[0]
    scale_y = new_size[1] / old_size[1]
    scale_factor = min(scale_x, scale_y)  # Use the smallest scale factor

    if scale_factor > 1:
        # Upscaling
        return PIL.Image.Resampling.BICUBIC if scale_factor < 3 else PIL.Image.Resampling.LANCZOS
    elif scale_factor < 1:
        # Downscaling
        return PIL.Image.Resampling.LANCZOS
    else:
        # No scaling
        return PIL.Image.Resampling.NEAREST




[docs]
def best_cv2_resampling(old_size: _types.Size, new_size: _types.Size) -> int:
    """
    Auto-select the best OpenCV resampling setting for a resize operation.

    :param old_size: (tuple) Source image shape (height, width, channels).
    :param new_size: (tuple) Destination image shape (height, width).
    :return: (int) Best OpenCV interpolation method.
    """
    scale_x = new_size[1] / old_size[1]
    scale_y = new_size[0] / old_size[0]
    scale_factor = min(scale_x, scale_y)  # Use the smallest scale factor

    if scale_factor > 1:
        # Upscaling
        return cv2.INTER_CUBIC if scale_factor < 3 else cv2.INTER_LANCZOS4
    elif scale_factor < 1:
        # Downscaling
        return cv2.INTER_AREA
    else:
        # No scaling
        return cv2.INTER_NEAREST




[docs]
def cv2_resize_image(img: numpy.ndarray,
                     size: _types.OptionalSize,
                     aspect_correct: bool = False,
                     align: int | None = None,
                     algo: typing.Optional[int] = None):
    """
    Resize a :py:class:`numpy.ndarray` image and return a copy.

    This function always returns a copy even if the images size did not change.

    The new image dimension will always be forcefully aligned by ``align``,
    specifying ``None`` or ``1`` indicates no alignment.

    The filename attribute is preserved.

    :param img: the image to resize
    :param size: requested new size for the image, may be ``None``.
    :param aspect_correct: preserve aspect ratio?
    :param align: Force alignment by this amount of pixels.
    :param algo: cv2 resampling algorithm
    :return: the resized image
    """
    in_height = img.shape[0]
    in_width = img.shape[1]

    new_size = resize_image_calc(old_size=(in_width, in_height),
                                 new_size=size,
                                 aspect_correct=aspect_correct,
                                 align=align)
    if img.size == new_size:
        # probably less costly
        return numpy.copy(img)

    if algo is None:
        algo = best_cv2_resampling((in_width, in_height), new_size)

    r = cv2.resize(img, new_size, interpolation=algo)

    return r




[docs]
def resize_image(img: PIL.Image.Image,
                 size: _types.OptionalSize,
                 aspect_correct: bool = False,
                 align: int | None = None,
                 algo: typing.Optional[PIL.Image.Resampling] = None):
    """
    Resize a :py:class:`PIL.Image.Image` and return a copy.

    This function always returns a copy even if the images size did not change.

    The new image dimension will always be forcefully aligned by ``align``,
    specifying ``None`` or ``1`` indicates no alignment.

    The filename attribute is preserved.

    :param img: the image to resize
    :param size: requested new size for the image, may be ``None``.
    :param aspect_correct: preserve aspect ratio?
    :param align: Force alignment by this amount of pixels.
    :param algo: Resampling algorithm
    :return: the resized image
    """
    new_size = resize_image_calc(old_size=img.size,
                                 new_size=size,
                                 aspect_correct=aspect_correct,
                                 align=align)
    if img.size == new_size:
        # probably less costly
        return copy_img(img)

    if algo is None:
        algo = best_pil_resampling(img.size, new_size)

    r = img.resize(new_size, algo)

    if hasattr(img, 'filename'):
        r.filename = img.filename

    return r




[docs]
def paste_with_feather(
        background: PIL.Image.Image,
        foreground: PIL.Image.Image,
        location: tuple[int, int] | tuple[int, int, int, int] | list[int],
        feather: int = 30,
        shape: str = 'rectangle'
) -> PIL.Image.Image:
    """
    Composite an image onto a background with feathered (soft) edges.

    Creates smooth, blended transitions between foreground and background images
    by applying Gaussian blur to a mask, eliminating hard edges. The feathering
    effect is achieved by shrinking the mask and then blurring it.

    :param background: The background image to paste onto. Will be converted to RGBA mode.
    :param foreground: The foreground image to paste. Will be resized to fit the specified location.
    :param location: Specifies where to place the image. 2 elements (x, y) for top-left corner
                     offset using input_img original size, 4 elements (x0, y0, x1, y1) for
                     bounding box coordinates, or None to center with margin based on feather width.
    :param feather: The desired width of the feathered edge in pixels.
    :param shape: The shape of the mask. ``r`` / ``rect`` / ``rectangle`` for rectangular
        mask, ``c`` / ``circle`` / ``ellipse`` for circular.
    :returns: The composite image with feathered edges in the mode (channels) of the background image.
    :raises ValueError: If location is provided but doesn't contain 2 or 4 elements.
                        If shape is not recognized.
    """

    input_mode = background.mode
    background = background.convert("RGBA")

    inset = feather // 2
    blur_radius = max(1, feather / 6.0) if feather > 0 else 0

    margin = math.ceil(feather)

    if len(location) == 2:
        paste_offset = tuple(location)
        paste_size = foreground.size
    elif len(location) == 4:
        x0, y0, x1, y1 = location
        paste_offset = (x0, y0)
        paste_size = (x1 - x0, y1 - y0)
    else:
        raise ValueError("location must be a tuple/list of length 2 or 4")

    mask_size = (paste_size[0] + 2 * margin, paste_size[1] + 2 * margin)
    mask = PIL.Image.new("L", mask_size, 0)
    draw = PIL.ImageDraw.Draw(mask)

    mask_shape = _textprocessing.parse_basic_mask_shape(shape)

    if mask_shape == _textprocessing.BasicMaskShape.ELLIPSE:
        cx = mask_size[0] // 2
        cy = mask_size[1] // 2
        r = min(paste_size) // 2 - inset
        if r < 0:
            r = 0
        bbox = (cx - r, cy - r, cx + r, cy + r)
        draw.ellipse(bbox, fill=255)
    elif mask_shape == _textprocessing.BasicMaskShape.RECTANGLE:
        left = margin + inset
        top = margin + inset
        right = margin + paste_size[0] - inset
        bottom = margin + paste_size[1] - inset

        if left >= right or top >= bottom:
            left = top = right = bottom = 0
        draw.rectangle((left, top, right, bottom), fill=255)
    else:
        raise ValueError(f'Unknown mask shape: {shape}')

    if blur_radius > 0:
        blurred_mask = mask.filter(PIL.ImageFilter.GaussianBlur(radius=blur_radius))
    else:
        blurred_mask = mask

    input_rgba = foreground.convert("RGBA").resize(
        paste_size, PIL.Image.Resampling.LANCZOS
    )

    input_with_margin = PIL.Image.new("RGBA", mask_size, (0, 0, 0, 0))
    input_with_margin.paste(input_rgba, (margin, margin))

    if input_rgba.mode == 'RGBA':
        existing_alpha = input_rgba.split()[-1]
        alpha_with_margin = PIL.Image.new("L", mask_size, 0)
        alpha_with_margin.paste(existing_alpha, (margin, margin))

        combined_alpha = PIL.ImageChops.multiply(alpha_with_margin, blurred_mask)
        input_with_margin.putalpha(combined_alpha)
    else:
        input_with_margin.putalpha(blurred_mask)

    composite = background.copy()

    adjusted_offset = (paste_offset[0] - margin, paste_offset[1] - margin)

    composite.paste(input_with_margin, adjusted_offset, input_with_margin)

    return composite.convert(input_mode)




[docs]
def letterbox_image(img: PIL.Image,
                    box_size: _types.Padding,
                    box_is_padding: bool = False,
                    box_color: str | int | float | tuple[int, int, int] | tuple[float, float, float] | None = None,
                    inner_size: _types.Size = None,
                    aspect_correct: bool = True):
    """
    Letterbox an image on to a colored background.

    :param img: The image to letterbox
    :param box_size: Size of the outer letterbox, or padding values.
        - If ``box_is_padding=False``:
            - (int) both width and height equal to this integer
            - (width, height) tuple for final letterbox size
        - If ``box_is_padding=True``: Can be either:
            - (padding) for uniform padding
            - (horizontal_padding, vertical_padding) for uniform padding
            - (left, top, right, bottom) for individual padding on each side
    :param box_is_padding: The ``box_size`` argument should be interpreted as padding?
    :param box_color: What color to use for the letter box background, the default is black.
        This should be specified as a HEX color code, or as a 3 tuple of integer or floating
        point RGB values, or as a single integer or float representing all color channels.
    :param inner_size: The size of the inner image
    :param aspect_correct: Should the size of the inner image be aspect correct?
    :return: The letterboxed image
    """
    if inner_size is None:
        inner_size = img.size

    if box_is_padding:
        if isinstance(box_size, int):
            # Single integer: uniform padding on all sides
            padding_left = padding_top = box_size
            final_box_size = (inner_size[0] + 2 * box_size,
                              inner_size[1] + 2 * box_size)
        elif len(box_size) == 2:
            # Two values: (horizontal_padding, vertical_padding)
            horizontal_padding, vertical_padding = box_size
            final_box_size = (inner_size[0] + 2 * horizontal_padding,
                              inner_size[1] + 2 * vertical_padding)
            padding_left = horizontal_padding
            padding_top = vertical_padding
        elif len(box_size) == 4:
            # Four values: (left, top, right, bottom)
            padding_left, padding_top, padding_right, padding_bottom = box_size
            final_box_size = (inner_size[0] + padding_left + padding_right,
                              inner_size[1] + padding_top + padding_bottom)
        else:
            raise ValueError("box_size must be an int, 2-tuple, or 4-tuple when box_is_padding=True")
    else:
        if isinstance(box_size, int):
            # Single integer: square letterbox
            final_box_size = (box_size, box_size)
        else:
            # Two values: (width, height)
            final_box_size = box_size
        # Calculate padding for centering when not in padding mode
        padding_left = (final_box_size[0] - inner_size[0]) // 2
        padding_top = (final_box_size[1] - inner_size[1]) // 2

    # Ensure inner_size fits within the final box
    inner_size = (min(inner_size[0], final_box_size[0]),
                  min(inner_size[1], final_box_size[1]))

    if box_color is None:
        box_color = 0

    letterbox = PIL.Image.new('RGB', final_box_size, box_color)

    img = resize_image(img, inner_size, aspect_correct=aspect_correct)

    if box_is_padding and (isinstance(box_size, int) or len(box_size) == 4):
        # Use the specific padding values for positioning
        x = padding_left
        y = padding_top
    else:
        # Center the image (original behavior for 2-tuple padding and non-padding mode)
        x = (final_box_size[0] - img.size[0]) // 2
        y = (final_box_size[1] - img.size[1]) // 2

    letterbox.paste(img, (x, y))

    if hasattr(img, 'filename'):
        letterbox.filename = img.filename

    return letterbox




[docs]
def to_rgb(img: PIL.Image.Image):
    """
    Convert a :py:class:`PIL.Image.Image` to RGB format while preserving its filename attribute.

    :param img: the image
    :return: a converted copy of the image
    """
    c = img.convert('RGB')
    if hasattr(img, 'filename'):
        c.filename = img.filename
    return c




[docs]
def get_filename(img: PIL.Image.Image):
    """
    Get the :py:attr:`PIL.Image.Image.filename` attribute or "NO_FILENAME" if it does not exist.

    :param img: :py:class:`PIL.Image.Image`
    :return: filename string or "NO_FILENAME"
    """

    if hasattr(img, 'filename'):
        return img.filename
    return 'NO_FILENAME'




[docs]
def create_jpeg_exif_with_user_comment(comment: str) -> bytes:
    """
    Return JPEG EXIF data with a user comment field, this can be used with ``PIL.Image.save(img, exif=...)``.

    This function is specifically for saving JPEG files only.

    :return: EXIF data (bytes)
    """
    exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None}
    exif_dict["Exif"][piexif.ExifIFD.UserComment] = \
        piexif.helper.UserComment.dump(
            comment, encoding="unicode"
        )
    return piexif.dump(exif_dict)




[docs]
def read_jpeg_exif_user_comment(img: PIL.Image.Image) -> str | None:
    """
    Read the user comment field from a JPEG EXIF data, this can be used with ``PIL.Image.open(img)``.

    This function is specifically for JPEG files only.

    :param img: :py:class:`PIL.Image.Image`
    :return: user comment string or empty string if not found
    """
    if "exif" not in img.info:
        return None

    exif_dict = piexif.load(img.info["exif"])

    user_comment_raw = exif_dict["Exif"].get(piexif.ExifIFD.UserComment)

    if user_comment_raw:
        user_comment = piexif.helper.UserComment.load(user_comment_raw)
        return user_comment
    else:
        return None