Source code for dgenerate.pipelinewrapper.uris.textualinversionuri

# Copyright (c) 2023, Teriks
#
# dgenerate is distributed under the following BSD 3-Clause License
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in
#    the documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os.path

import diffusers
import huggingface_hub
import safetensors
import safetensors.torch
import torch

import dgenerate.messages as _messages
import dgenerate.pipelinewrapper.hfutil as _hfutil
import dgenerate.textprocessing as _textprocessing
import dgenerate.types as _types
from dgenerate.pipelinewrapper.uris import exceptions as _exceptions

_textual_inversion_uri_parser = _textprocessing.ConceptUriParser('Textual Inversion',
                                                                 ['token', 'revision', 'subfolder', 'weight-name'])


def _load_textual_inversion_state_dict(pretrained_model_name_or_path, **kwargs):
    from diffusers.utils.hub_utils import _get_model_file

    text_inversion_name = "learned_embeds.bin"
    text_inversion_name_safe = "learned_embeds.safetensors"

    cache_dir = kwargs.pop("cache_dir", None)
    force_download = kwargs.pop("force_download", False)
    resume_download = kwargs.pop("resume_download", False)
    proxies = kwargs.pop("proxies", None)
    local_files_only = kwargs.pop("local_files_only", None)
    token = kwargs.pop("token", None)
    revision = kwargs.pop("revision", None)
    subfolder = kwargs.pop("subfolder", None)
    weight_name = kwargs.pop("weight_name", None)
    use_safetensors = kwargs.pop("use_safetensors", None)

    allow_pickle = False
    if use_safetensors is None:
        use_safetensors = True
        allow_pickle = True

    user_agent = {
        "file_type": "text_inversion",
        "framework": "pytorch",
    }

    # 3.1. Load textual inversion file
    state_dict = None
    model_file = None

    # Let's first try to load .safetensors weights
    if (use_safetensors and weight_name is None) or (
            weight_name is not None and weight_name.endswith(".safetensors")
    ):
        try:
            model_file = _get_model_file(
                pretrained_model_name_or_path,
                weights_name=weight_name or text_inversion_name_safe,
                cache_dir=cache_dir,
                force_download=force_download,
                resume_download=resume_download,
                proxies=proxies,
                local_files_only=local_files_only,
                token=token,
                revision=revision,
                subfolder=subfolder,
                user_agent=user_agent,
            )
            state_dict = safetensors.torch.load_file(model_file, device="cpu")
        except Exception as e:
            if not allow_pickle:
                raise e

            model_file = None

    if model_file is None:
        model_file = _get_model_file(
            pretrained_model_name_or_path,
            weights_name=weight_name or text_inversion_name,
            cache_dir=cache_dir,
            force_download=force_download,
            resume_download=resume_download,
            proxies=proxies,
            local_files_only=local_files_only,
            token=token,
            revision=revision,
            subfolder=subfolder,
            user_agent=user_agent,
        )
        state_dict = torch.load(model_file, map_location="cpu")
    return model_file, state_dict


[docs] class TextualInversionUri: """ Representation of ``--textual-inversions`` uri """ @property def model(self) -> str: """ Model path, huggingface slug, file path """ return self._model @property def revision(self) -> _types.OptionalString: """ Model repo revision """ return self._revision @property def subfolder(self) -> _types.OptionalPath: """ Model repo subfolder """ return self._subfolder @property def weight_name(self) -> _types.OptionalName: """ Model weight-name """ return self._weight_name @property def token(self) -> _types.OptionalString: """ Prompt keyword """ return self._token
[docs] def __init__(self, model: str, token: str | None = None, revision: _types.OptionalString = None, subfolder: _types.OptionalPath = None, weight_name: _types.OptionalName = None): self._token = token self._model = model self._revision = revision self._subfolder = subfolder self._weight_name = weight_name
def __str__(self): return f'{self.__class__.__name__}({str(_types.get_public_attributes(self))})' def __repr__(self): return str(self)
[docs] def load_on_pipeline(self, pipeline: diffusers.DiffusionPipeline, use_auth_token: _types.OptionalString = None, local_files_only: bool = False): """ Load Textual Inversion weights on to a pipeline using this URI :param pipeline: :py:class:`diffusers.DiffusionPipeline` :param use_auth_token: optional huggingface auth token. :param local_files_only: avoid downloading files and only look for cached files when the model path is a huggingface slug :raises ModelNotFoundError: If the model could not be found. """ try: self._load_on_pipeline(pipeline=pipeline, use_auth_token=use_auth_token, local_files_only=local_files_only) except (huggingface_hub.utils.HFValidationError, huggingface_hub.utils.HfHubHTTPError) as e: raise _hfutil.ModelNotFoundError(e) except Exception as e: raise _exceptions.TextualInversionUriLoadError( f'error loading textual inversion "{self.model}": {e}')
def _load_on_pipeline(self, pipeline: diffusers.DiffusionPipeline, use_auth_token: _types.OptionalString = None, local_files_only: bool = False): if hasattr(pipeline, 'load_textual_inversion'): debug_args = {k: v for k, v in locals().items() if k not in {'self', 'pipeline'}} _messages.debug_log('pipeline.load_textual_inversion(' + str(_types.get_public_attributes(self) | debug_args) + ')') model_path = _hfutil.download_non_hf_model(self.model) # this is tricky because there is stupidly a positional argument named 'token' # as well as an accepted kwargs value with the key 'token' old_token = os.environ.get('HF_TOKEN', None) if use_auth_token is not None: os.environ['HF_TOKEN'] = use_auth_token try: if pipeline.__class__.__name__.startswith('StableDiffusionXL'): filename, dicts = _load_textual_inversion_state_dict( model_path, revision=self.revision, subfolder=self.subfolder, weight_name=self.weight_name, local_files_only=local_files_only ) if 'clip_l' not in dicts or 'clip_g' not in dicts: raise RuntimeError( 'clip_l or clip_g not found in SDXL textual ' f'inversion model "{self.model}" state dict, ' 'unsupported model format.') # token is the file name (no extension) with spaces # replaced by underscores when the user does not provide # a prompt token token = os.path.splitext( os.path.basename(filename))[0].replace(' ', '_') \ if self.token is None else self.token pipeline.load_textual_inversion(dicts['clip_l'], token=token, text_encoder=pipeline.text_encoder, tokenizer=pipeline.tokenizer) pipeline.load_textual_inversion(dicts['clip_g'], token=token, text_encoder=pipeline.text_encoder_2, tokenizer=pipeline.tokenizer_2) else: pipeline.load_textual_inversion(model_path, token=self.token, revision=self.revision, subfolder=self.subfolder, weight_name=self.weight_name, local_files_only=local_files_only) finally: if old_token is not None: os.environ['HF_TOKEN'] = old_token _messages.debug_log(f'Added Textual Inversion: "{self}" to pipeline: "{pipeline.__class__.__name__}"') else: raise RuntimeError(f'Pipeline: {pipeline.__class__.__name__} ' f'does not support loading textual inversions.')
[docs] @staticmethod def parse(uri: _types.Uri) -> 'TextualInversionUri': """ Parse a ``--textual-inversions`` uri and return an object representing its constituents :param uri: string with ``--textual-inversions`` uri syntax :raise InvalidTextualInversionUriError: :return: :py:class:`.TextualInversionPath` """ try: r = _textual_inversion_uri_parser.parse(uri) return TextualInversionUri(model=r.concept, token=r.args.get('token', None), weight_name=r.args.get('weight-name', None), revision=r.args.get('revision', None), subfolder=r.args.get('subfolder', None)) except _textprocessing.ConceptUriParseError as e: raise _exceptions.InvalidTextualInversionUriError(e)