Source code for dgenerate.imageprocessors.paste

# Copyright (c) 2023, Teriks
#
# dgenerate is distributed under the following BSD 3-Clause License
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in
#    the documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os

import PIL.Image
import PIL.ImageFilter

import dgenerate.image as _image
import dgenerate.imageprocessors.imageprocessor as _imageprocessor
import dgenerate.textprocessing as _textprocessing
import dgenerate.types as _types
import dgenerate.webcache as _webcache


[docs] class PasteProcessor(_imageprocessor.ImageProcessor): """ Paste an image on top of the incoming image at a specified position. The "image" argument specifies the path to the image file to paste, this may be path on disk or a URL link to an image file. The "image-processors" argument allows you to pre-process "image" with an arbitrary image processor chain. This arguments value must be quoted (single or double string quotes) if you intend to supply arguments to the processors in the chain. The pixel alignment of this processor chain defaults to 1, meaning no forced alignment will occur, you can force alignment using the "resize" image processor if desired. The "position" argument specifies where to paste the image. It can be: NOWRAP! - "LEFTxTOP" format (e.g., "100x50") to specify the top-left coordinate - "LEFTxTOPxRIGHTxBOTTOM" format (e.g., "100x50x300x200") to specify a bounding box where the source image will be resized to fit The "feather" argument specifies the feathering radius in pixels for softening edges. This creates smooth transitions from opaque to transparent. If not specified, no feathering is applied. Cannot be used together with the "mask" parameter, as this auto generates a feather mask for you. The "feather-shape" argument controls the shape of the feathering: NOWRAP! - "r" or "rect" or "rectangle" (default): Rectangular feathering from edges - "c" or "circle" or "ellipse": Elliptical feathering from center Only used when "feather" is specified. The "mask" argument allows you to specify a mask image path that will be used to control the transparency of the pasted image. This may be a path on disk or a URL link to an image file. The mask should be a grayscale image where white areas represent full opacity and black areas represent full transparency. Cannot be used together with the "feather" parameter. This mask will always be resized to the size of the pasted image, which may be the "image" argument, or the processed image depending on the value of "reverse". The "mask-processors" argument allows you to pre-process the "mask" argument with an arbitrary image processor chain. For example: invert, gaussian-blur, etc. This cannot be used in "feather" mode on the auto generated feather mask, only on user supplied masks. This arguments value must be quoted (single or double string quotes) if you intend to supply arguments to the processors in the chain. The pixel alignment of this processor chain defaults to 1, meaning no forced alignment will occur, you can force alignment using the "resize" image processor if desired. The "position-mask" argument allows you to specify a mask image, which will have its white area bounds detected to determine the value of "position" for pasting. A bounding box will be determined by looking at the image and finding the extents of the white pixels in the mask. If you specify an image, the "position" argument will be ignored. This mask will always be resized to the size of the background image, which may be the processed image or the "image" argument depending on the value of "reverse". The "position-mask-padding" argument allows you to specify a padding value for the bounding box detection on "position-mask", this allows you to add positive or negative padding the detected mask bounding box. This value should be a single integer (uniform), or WIDTHxHEIGHT (horizontal and vertical padding), or (LEFTxTOPxRIGHTxBOTTOM) The "position-mask-processors" argument allows you to pre-process the "position-mask" argument with an arbitrary image processor chain. For example: invert, gaussian-blur, etc. This cannot be used in "feather" mode on the auto generated feather mask, only on user supplied masks. This arguments value must be quoted (single or double string quotes) if you intend to supply arguments to the processors in the chain. The pixel alignment of this processor chain defaults to 1, meaning no forced alignment will occur, you can force alignment using the "resize" image processor if desired. The "reverse" argument allows you to reverse the paste operation, meaning the "image" argument is to be considered the background, and the processed image is to be the pasted content. The "pre-resize" argument determines if the processing occurs before or after dgenerate resizes the image. This defaults to False, meaning the image is processed after dgenerate is done resizing it. """ NAMES = ['paste'] OPTION_ARGS = { 'feather_shape': ['r', 'rect', 'rectangle', 'c', 'circle', 'ellipse'], } FILE_ARGS = { 'image': {'mode': 'in', 'filetypes': [('Images', _imageprocessor.ImageProcessor.image_in_filetypes())]}, 'mask': {'mode': 'in', 'filetypes': [('Images', _imageprocessor.ImageProcessor.image_in_filetypes())]}, 'position_mask': {'mode': 'in', 'filetypes': [('Images', _imageprocessor.ImageProcessor.image_in_filetypes())]} }
[docs] @classmethod def inheritable_help(cls, loaded_by_name): help_messages = { 'device': ( 'The "device" argument can be used to set the device ' 'the image-processors/mask-processors will run on, for example: cpu, cuda, cuda:1.' ), 'model-offload': ( 'The "model-offload" argument can be used to enable ' 'cpu model offloading for the image-processors/mask-processors. If this is disabled, ' 'any torch tensors or modules placed on the GPU will remain there until ' 'the image-processor/mask-processor is done being used, instead of them being moved back to the CPU ' 'after each invocation. Enabling this may help save VRAM when using multiple image/mask processors ' 'that make use of the GPU.' ) } return help_messages
[docs] def __init__(self, image: str, image_processors: str | None = None, position: str | None = None, feather: int | None = None, feather_shape: str = "rectangle", mask: str | None = None, mask_processors: str | None = None, position_mask: str | None = None, position_mask_padding: str | int | None = None, position_mask_processors: str | None = None, reverse: bool = False, pre_resize: bool = False, **kwargs): """ :param image: path to the image file to paste, or paste on to if ``reverse=True`` :param image_processors: Pre-process ``image`` with an arbitrary image processor chain :param position: position specification in "LEFTxTOP" or "LEFTxTOPxRIGHTxBOTTOM" format :param feather: feathering radius in pixels for softening edges (cannot be used with mask) :param feather_shape: shape of feathering ("rectangle", "rect", "circle", or "ellipse") :param mask: path to a mask image file for controlling transparency (cannot be used with feather) :param mask_processors: Pre-process ``mask`` with an arbitrary image processor chain, not compatible with ``feather``. :param position_mask: path to a mask image file for determining paste position from white area bounds :param position_mask_padding: padding value for the position mask bounding box (default "0") :param position_mask_processors: Pre-process ``position_mask`` with an arbitrary image processor chain :param reverse: Reverse the paste operation? :param pre_resize: process the image before it is resized, or after? default is False (after) :param kwargs: forwarded to base class """ super().__init__(**kwargs) if feather is not None and mask is not None: raise self.argument_error( 'Cannot use both "feather" and "mask" arguments together. ' 'Choose one method for transparency.' ) if mask is None and mask_processors: raise self.argument_error( 'Cannot use "mask-processors" without specifying "mask"' ) if position_mask is None and position_mask_processors: raise self.argument_error( 'Cannot use "position-mask-processors" without specifying "position-mask"' ) if position_mask is None and position_mask_padding is not None: raise self.argument_error( 'Cannot use "position-mask-padding" without specifying "position-mask"' ) if position_mask_padding is None: position_mask_padding = 0 if position is None: position = "0x0" if feather is not None and feather < 0: raise self.argument_error( 'Feather value must be greater than or equal to 0') if feather is not None and feather_shape is not None: try: parsed_shape = _textprocessing.parse_basic_mask_shape(feather_shape) except ValueError: parsed_shape = None if parsed_shape is None or parsed_shape not in { _textprocessing.BasicMaskShape.RECTANGLE, _textprocessing.BasicMaskShape.ELLIPSE }: raise self.argument_error( 'Feather shape must be: "r", "rect", "rectangle", or "c", "circle", "ellipse"') self._feather = feather self._feather_shape = feather_shape self._pre_resize = pre_resize self._reverse = reverse # Load source image upfront if not _webcache.is_downloadable_url(image) and not os.path.exists(image): raise self.argument_error(f'Argument "image" file does not exist: {image}') try: self._paste_image = self._load_image(image) # Ensure source image is in RGB mode if self._paste_image.mode != 'RGB': self._paste_image = self._paste_image.convert('RGB') except Exception as e: raise self.argument_error(f'Failed to load argument "image": {e}') if image_processors: self._paste_image = self._run_image_processor( image_processors, self._paste_image, resize_resolution=None, aspect_correct=False, align=1 ) # Load mask image upfront if provided self._mask_image = None if mask is not None: if not _webcache.is_downloadable_url(mask) and not os.path.exists(mask): raise self.argument_error(f'Argument "mask" file does not exist: {mask}') try: self._mask_image = self._load_image(mask) except Exception as e: raise self.argument_error(f'Failed to load argument "mask": {e}') if mask_processors: self._mask_image = self._run_image_processor( mask_processors, self._mask_image, aspect_correct=False, resize_resolution=None, align=1 ) # Convert to grayscale if needed if self._mask_image.mode != 'L': self._mask_image = self._mask_image.convert('L') # Load and process position mask image upfront if provided if position_mask is not None: self._position_mask_path = position_mask self._position_mask_processors = position_mask_processors self._position_mask_padding = position_mask_padding else: # Parse position argument normally if no position mask self._position_mask_path = None self._position_mask_processors = None self._position_mask_padding = None self._position = self._parse_position(position)
def _run_image_processor( self, uri_chain_string, image, resize_resolution, aspect_correct, align, ): """run an image processor from a URI chain string.""" import dgenerate.imageprocessors as _imgp # Convert image to RGB mode for consistent processing if image.mode != 'RGB': image = image.convert('RGB') processor = _imgp.create_image_processor( _textprocessing.shell_parse( uri_chain_string, expand_home=False, expand_glob=False, expand_vars=False ), device=self.device, model_offload=self.model_offload, ) try: return processor.process( image, resize_resolution=resize_resolution, aspect_correct=aspect_correct, align=align ) finally: processor.to('cpu') def _load_image(self, image_path: str) -> PIL.Image.Image: """Load an image from a file or URL.""" # Handle URL downloads using webcache if _webcache.is_downloadable_url(image_path): # Download and cache the URL _, image_path = _webcache.create_web_cache_file( image_path, mime_acceptable_desc='image files', mimetype_is_supported=lambda m: m.startswith('image/'), local_files_only=self.local_files_only ) return PIL.Image.open(image_path) def _parse_position(self, position: str) -> tuple: """Parse position string into coordinates""" try: parts = _textprocessing.parse_dimensions(position) except ValueError: raise self.argument_error( f'Invalid position format: {position}. Expected "LEFTxTOP" or "LEFTxTOPxRIGHTxBOTTOM"' ) if len(parts) == 2: # LEFTxTOP format left, top = parts if left < 0 or top < 0: raise self.argument_error('Position coordinates must be non-negative') return parts elif len(parts) == 4: # LEFTxTOPxRIGHTxBOTTOM format left, top, right, bottom = parts if left < 0 or top < 0 or right < 0 or bottom < 0: raise self.argument_error('Position coordinates must be non-negative') if left >= right or top >= bottom: raise self.argument_error( 'Invalid bounding box: left must be less than right and top must be less than bottom') return parts else: raise self.argument_error( f'Invalid position format: {position}. Expected "LEFTxTOP" or "LEFTxTOPxRIGHTxBOTTOM"') def _process(self, image: PIL.Image.Image) -> PIL.Image.Image: """Process the image by pasting the source image onto it""" # Make a copy of the base image to avoid modifying the original background_image = image.copy() if not self._reverse else self._paste_image.copy() # Use the preloaded source image paste_image = self._paste_image if not self._reverse else image if self._position_mask_path: # the position mask needs to be resized to the size of the # image that will be pasted on to, which may be the image being # processed, or the argument "image" depending on "reverse" if not _webcache.is_downloadable_url(self._position_mask_path) and not os.path.exists(self._position_mask_path): raise self.argument_error(f'Argument "position-mask" file does not exist: {self._position_mask_path}') try: position_mask_image = self._load_image(self._position_mask_path) except Exception as e: raise self.argument_error(f'Failed to load argument "position-mask": {e}') if self._position_mask_processors: position_mask_image = self._run_image_processor( self._position_mask_processors, position_mask_image, resize_resolution=background_image.size, aspect_correct=False, align=1 ) else: position_mask_image = position_mask_image.resize(background_image.size) # Convert to grayscale if needed if position_mask_image.mode != 'L': position_mask_image = position_mask_image.convert('L') # Calculate position from position mask bounds try: bounds = _image.find_mask_bounds(position_mask_image, self._position_mask_padding) except ValueError as e: # could not parse bounds raise self.argument_error(f'Error in "position-mask-padding" argument: {e}') if bounds is None: raise self.argument_error('No white pixels found in position mask image.') # overrides "position" argument self._position = bounds if len(self._position) == 2: left, top = self._position paste_box = (left, top) # PIL expects (left, upper) paste_size = paste_image.size elif len(self._position) == 4: left, top, right, bottom = self._position paste_box = (left, top, right, bottom) # PIL expects (left, upper, right, lower) paste_size = (right - left, bottom - top) # Resize source image to fit the bounding box resampling = _image.best_pil_resampling(paste_image.size, paste_size) paste_image = paste_image.resize(paste_size, resampling) else: assert False, f"invalid paste point / box with {len(self._position)} dimensions" # Handle transparency - either feather or mask, but not both if self._feather is not None: # Use paste_with_feather for smooth blending background_image = _image.paste_with_feather( background=background_image, foreground=paste_image, feather=self._feather, shape=self._feather_shape, location=paste_box ) elif self._mask_image is not None: # Apply custom mask mask_resampling = _image.best_pil_resampling(self._mask_image.size, paste_size) mask = self._mask_image.resize(paste_size, mask_resampling) background_image.paste(paste_image, paste_box, mask) else: # Simple paste without transparency background_image.paste(paste_image, paste_box) return background_image
[docs] def impl_pre_resize(self, image: PIL.Image.Image, resize_resolution: _types.OptionalSize): """ Process the image before resizing if pre_resize is True. :param image: image to process :param resize_resolution: purely informational, is unused by this processor :return: the processed image """ if self._pre_resize: return self._process(image) return image
[docs] def impl_post_resize(self, image: PIL.Image.Image): """ Process the image after resizing if pre_resize is False. :param image: image to process :return: the processed image """ if not self._pre_resize: return self._process(image) return image
[docs] def to(self, device) -> "PasteProcessor": """ Does nothing for this processor since it's PIL-based. :param device: the device (ignored) :return: this processor """ return self